Commit c5ac7e0c authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Cleanup

parent 162042c5
Loading
Loading
Loading
Loading
+0 −6
Original line number Diff line number Diff line
@@ -478,9 +478,6 @@ class Dataset(object):
                   metadata_rows=metadata_rows,
                   verbosity=self.verbosity)

  # TODO(rbharath): A subtle bug seems to arise with this. When splitting data,
  # we seem to get many shards that have no data. Can we make it so that empty
  # shards are removed?
  def to_singletask(self, task_dirs):
    """Transforms multitask dataset in collection of singletask datasets."""
    tasks = self.get_task_names()
@@ -502,10 +499,7 @@ class Dataset(object):
        w_nonzero = np.reshape(w_task[w_task != 0], (num_datapoints, 1))
        ids_nonzero = ids[w_task != 0]

        ######################################################################## DEBUG
        # Control for case when this task has no data in this shard.
        if X_nonzero.size > 0: 
        ######################################################################## DEBUG
          task_metadata_rows[task].append(
            Dataset.write_data_to_disk(
                task_dirs[task_num], basename, [task],
+0 −9
Original line number Diff line number Diff line
@@ -51,16 +51,7 @@ class SingletaskToMultitask(Model):
        shutil.rmtree(task_data_dir)
      os.makedirs(task_data_dir)
      task_data_dirs.append(task_data_dir)
    ############################################################# DEBUG
    print("_create_task_datasets")
    print("task_data_dirs")
    print(task_data_dirs)
    ############################################################# DEBUG
    task_datasets = dataset.to_singletask(task_data_dirs)
    ############################################################# DEBUG
    print("[task_dataset.data_dir for task_dataset in task_datasets]")
    print([task_dataset.data_dir for task_dataset in task_datasets])
    ############################################################# DEBUG
    if self.verbosity is not None:
      for task, task_dataset in zip(self.tasks, task_datasets):
        log("Dataset for task %s has shape %s"