Commit a6a537b2 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Cleanup

parent 4344e3e3
Loading
Loading
Loading
Loading
+0 −20
Original line number Diff line number Diff line
@@ -214,26 +214,6 @@ class Dataset(object):
          os.path.join(self.data_dir, row['ids'])), dtype=object)
      yield (X, y, w, ids)

  #def det_iterbatches(self, batch_size=None, epoch=0):
  #  """
  #  Returns minibatches from dataset.
  #  """
  #  for i, (X, y, w, ids) in enumerate(self.itershards()):
  #    nb_sample = np.shape(X)[0]
  #    if batch_size is None:
  #      shard_batch_size = nb_sample
  #    else:
  #      shard_batch_size = batch_size 
  #    interval_points = np.linspace(
  #        0, nb_sample, np.ceil(float(nb_sample)/shard_batch_size)+1, dtype=int)
  #    for j in range(len(interval_points)-1):
  #      indices = range(interval_points[j], interval_points[j+1])
  #      X_batch = X[indices, :]
  #      y_batch = y[indices]
  #      w_batch = w[indices]
  #      ids_batch = ids[indices]
  #      yield (X_batch, y_batch, w_batch, ids_batch)

  def iterbatches(self, batch_size=None, epoch=0, deterministic=False):
    """Returns minibatches from dataset randomly."""
    num_shards = self.get_number_shards()
+0 −4
Original line number Diff line number Diff line
@@ -222,10 +222,6 @@ class DataLoader(object):

  def _featurize_shard(self, df_shard, write_fn, shard_num, input_type):
    """Featurizes a shard of an input dataframe."""
    #################################################################### DEBUG
    print("input_type, self.smiles_field")
    print(input_type, self.smiles_field)
    #################################################################### DEBUG
    field = self.mol_field if input_type == "sdf" else self.smiles_field 
    field_type = "mol" if input_type == "sdf" else "smiles" 
    log("Currently featurizing feature_type: %s"
+0 −10
Original line number Diff line number Diff line
@@ -72,21 +72,11 @@ class SingletaskToMultitask(Model):
    task_datasets = self._create_task_datasets(dataset)
    for ind, task in enumerate(self.tasks):
      log("Fitting model for task %s" % task, self.verbosity, "high")
      #y_task = task_datasets[ind].get_labels()
      #X_task, y_task, w_task, ids_task = task_datasets[ind].to_numpy()
      task_model = self.model_builder(
          [task], {task: self.task_types[task]}, self.model_params,
          self.task_model_dirs[task],
          verbosity=self.verbosity)
      #if y_task.size > 0:
        #task_model.raw_model.fit(X_task, np.ravel(y_task))
      task_model.fit(task_datasets[ind])
      #else:
      #  print("No labels for task %s" % task)
      #  print("Fitting on dummy dataset.")
      #  X_task_fake = np.zeros_like(X)
      #  y_task_fake = np.zeros_like(w_task)
      #  task_model.raw_model.fit(X_task_fake, y_task_fake)
      task_model.save()
      if self.store_in_memory:
        self.task_models[task] = task_model