Commit c452aa55 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Cleanup

parent 96c21502
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -116,12 +116,6 @@ class Dataset(object):
    out_ids = "%s-ids.joblib" % basename

    if X is not None:
      ############################################## DEBUG
      print("X.shape")
      print(X.shape)
      print("os.path.join(data_dir, out_X)")
      print(os.path.join(data_dir, out_X))
      ############################################## DEBUG
      save_to_disk(X, os.path.join(data_dir, out_X))
      save_to_disk(X, os.path.join(data_dir, out_X_transformed))
      X_sums, X_sum_squares, X_n = compute_sums_and_nb_sample(X)
+0 −8
Original line number Diff line number Diff line
@@ -72,14 +72,6 @@ class TestReload(TestAPI):
    # TODO(rbharath): Transformers don't play nice with reload! Namely,
    # reloading will cause the transform to be reapplied. This is undesirable in
    # almost all cases. Need to understand a method to fix this.
    ##################################### DEBUG
    
    print("_run_muv_experiment()")
    print("train_dataset.get_labels()")
    print(train_dataset.get_labels())
    print("train_dataset.get_weights()")
    print(train_dataset.get_weights())
    ##################################### DEBUG
    transformers = [
        BalancingTransformer(transform_w=True, dataset=train_dataset)]
    print("Transforming datasets")
+0 −22
Original line number Diff line number Diff line
@@ -95,28 +95,6 @@ def _get_input_type(input_file):
  else:
    raise ValueError("Unrecognized extension %s" % file_extension)

#def _get_fields(input_file):
#  """Get the names of fields and field_types for input data."""
#  # If CSV input, assume that first row contains labels
#  input_type = _get_input_type(input_file)
#  if input_type == "csv":
#    with open(input_file, "rb") as inp_file_obj:
#      return csv.reader(inp_file_obj).next()
#  elif input_type == "pandas-joblib":
#    df = load_from_disk(input_file)
#    return df.keys()
#  elif input_type == "pandas-pickle":
#    df = load_pickle_from_disk(input_file)
#    return df.keys()
#  # If SDF input, assume that .sdf.csv file contains labels 
#  elif input_type == "sdf":
#    label_file = input_file + ".csv"
#    print("Reading labels from %s" % label_file)
#    with open(label_file, "rb") as inp_file_obj:
#      return inp_file_obj.readline()
#  else:
#    raise ValueError("Unrecognized extension for %s" % input_file)

class DataFeaturizer(object):
  """
  Handles loading/featurizing of chemical samples (datapoints).
+0 −6
Original line number Diff line number Diff line
@@ -59,20 +59,14 @@ class SingletaskToMultitask(Model):
          [task], {task: self.task_types[task]}, self.model_params,
          self.task_model_dirs[task],
          verbosity=self.verbosity)
      #################################### DEBUG
      if y_task.size > 0:
      #################################### DEBUG
        task_model.raw_model.fit(X_task, y_task)
      #################################### DEBUG
      else:
        print("No labels for task %s" % task)
        print("Fitting on dummy dataset.")
        X_task_fake = np.zeros_like(X)
        y_task_fake = np.zeros_like(w_task)
        print("X.shape, y.shape, w.shape, y_task.shape, w_task.shape, y_task_fake.shape")
        print(X.shape, y.shape, w.shape, y_task.shape, w_task.shape, y_task_fake.shape)
        task_model.raw_model.fit(X_task_fake, y_task_fake)
      #################################### DEBUG
      task_model.save()

  def predict_on_batch(self, X):