Commit 1b7140fe authored by Bharath's avatar Bharath
Browse files

Some test fixes

parent e86b1733
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -598,7 +598,16 @@ def compute_sums_and_nb_sample(tensor, W=None):
# make it easy to use multiprocessing.
def convert_df_to_numpy(df, feature_type, tasks, mol_id_field):
  """Transforms a featurized dataset df into standard set of numpy arrays"""
  ############################################################## DEBUG
  print("SLKFJD:LSKJF:SLFKJ:SLDFKJSD:LKFJDSLKFJSDKFJSLKFJS:LFJSDLKJ")
  ############################################################## DEBUG
  if feature_type not in df.keys():
    ############################################################## DEBUG
    print("SLKFJD:LSKJF:SLFKJ:SLDFKJSD:LKFJDSLKFJSDKFJSLKFJS:LFJSDLKJ")
    print("feature_type")
    print(feature_type)
    ############################################################## DEBUG

    raise ValueError(
        "Featurized data does not support requested feature_type.")
  # perform common train/test split across all tasks
+6 −6
Original line number Diff line number Diff line
@@ -29,14 +29,14 @@ class TestDatasetAPI(TestAPI):
    """Loads solubility data from example.csv"""
    if os.path.exists(self.data_dir):
      shutil.rmtree(self.data_dir)
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    tasks = ["log-solubility"]
    task_type = "regression"
    input_file = os.path.join(self.current_dir, "../../models/tests/example.csv")
    featurizer = DataFeaturizer(
        tasks=tasks,
        smiles_field=self.smiles_field,
        featurizers=featurizers,
        featurizer=featurizer,
        verbosity="low")

    return featurizer.featurize(input_file, self.data_dir)
@@ -45,7 +45,7 @@ class TestDatasetAPI(TestAPI):
    """Loads classification data from example.csv"""
    if os.path.exists(self.data_dir):
      shutil.rmtree(self.data_dir)
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = [CircularFingerprint(size=1024)]
    tasks = ["outcome"]
    task_type = "classification"
    input_file = os.path.join(
@@ -53,7 +53,7 @@ class TestDatasetAPI(TestAPI):
    featurizer = DataFeaturizer(
        tasks=tasks,
        smiles_field=self.smiles_field,
        featurizers=featurizers,
        featurizer=featurizer,
        verbosity="low")
    return featurizer.featurize(input_file, self.data_dir)

@@ -61,7 +61,7 @@ class TestDatasetAPI(TestAPI):
    """Load example multitask data."""
    if os.path.exists(self.data_dir):
      shutil.rmtree(self.data_dir)
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    tasks = ["task0", "task1", "task2", "task3", "task4", "task5", "task6",
             "task7", "task8", "task9", "task10", "task11", "task12",
             "task13", "task14", "task15", "task16"]
@@ -70,6 +70,6 @@ class TestDatasetAPI(TestAPI):
    featurizer = DataFeaturizer(
        tasks=tasks,
        smiles_field=self.smiles_field,
        featurizers=featurizers,
        featurizer=featurizer,
        verbosity="low")
    return featurizer.featurize(input_file, self.data_dir)
+2 −2
Original line number Diff line number Diff line
@@ -34,12 +34,12 @@ class TestDrop(TestAPI):

    # Featurize emols dataset
    print("About to featurize datasets.")
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    emols_tasks = ['activity']

    featurizer = DataFeaturizer(tasks=emols_tasks,
                                smiles_field="smiles",
                                featurizers=featurizers,
                                featurizer=featurizer,
                                verbosity=verbosity)
    dataset = featurizer.featurize(dataset_file, data_dir)

+7 −7
Original line number Diff line number Diff line
@@ -35,11 +35,11 @@ class TestLoad(TestAPI):
    dataset_file = os.path.join(
        current_dir, "../../models/tests/example.csv")

    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    tasks = ["log-solubility"]
    featurizer = DataFeaturizer(tasks=tasks,
                                smiles_field="smiles",
                                featurizers=featurizers,
                                featurizer=featurizer,
                                verbosity=verbosity)
    dataset = featurizer.featurize(
        dataset_file, data_dir)
@@ -89,13 +89,13 @@ class TestLoad(TestAPI):

    # Featurize tox21 dataset
    print("About to featurize dataset.")
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = [CircularFingerprint(size=1024)]
    all_tasks = ["task%d"%i for i in range(17)] 

    ####### Do featurization
    featurizer = DataFeaturizer(tasks=all_tasks,
                                smiles_field="smiles",
                                featurizers=featurizers,
                                featurizer=featurizer,
                                verbosity=verbosity)
    dataset = featurizer.featurize(
        dataset_file, data_dir)
@@ -154,7 +154,7 @@ class TestLoad(TestAPI):

    # Featurize tox21 dataset
    print("About to featurize dataset.")
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    all_tasks = ["task%d"%i for i in range(17)] 
    # For debugging purposes
    n_tasks = 17 
@@ -163,7 +163,7 @@ class TestLoad(TestAPI):
    ####### Do multitask load
    featurizer = DataFeaturizer(tasks=tasks,
                                smiles_field="smiles",
                                featurizers=featurizers,
                                featurizer=featurizer,
                                verbosity=verbosity)
    dataset = featurizer.featurize(dataset_file, data_dir)

@@ -179,7 +179,7 @@ class TestLoad(TestAPI):
        shutil.rmtree(data_dir)
      featurizer = DataFeaturizer(tasks=[task],
                                  smiles_field="smiles",
                                  featurizers=featurizers,
                                  featurizer=featurizer,
                                  verbosity=verbosity)
      dataset = featurizer.featurize(dataset_file, data_dir)

+4 −4
Original line number Diff line number Diff line
@@ -34,11 +34,11 @@ class TestMerge(TestAPI):
    dataset_file = os.path.join(
        current_dir, "../../models/tests/example.csv")

    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    tasks = ["log-solubility"]
    featurizer = DataFeaturizer(tasks=tasks,
                                smiles_field="smiles",
                                featurizers=featurizers,
                                featurizer=featurizer,
                                verbosity=verbosity)
    first_dataset = featurizer.featurize(
        dataset_file, first_data_dir)
@@ -60,11 +60,11 @@ class TestMerge(TestAPI):
    dataset_file = os.path.join(
        current_dir, "../../models/tests/example.csv")

    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    tasks = ["log-solubility"]
    featurizer = DataFeaturizer(tasks=tasks,
                                smiles_field="smiles",
                                featurizers=featurizers,
                                featurizer=featurizer,
                                verbosity=verbosity)
    dataset = featurizer.featurize(
        dataset_file, data_dir, shard_size=2)