Commit 7bdc2a80 authored by Bharath's avatar Bharath
Browse files

Debugging broken tests

parent 1b7140fe
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -13,6 +13,9 @@ from functools import partial
from deepchem.utils.save import save_to_disk
from deepchem.utils.save import load_from_disk
from deepchem.utils.save import log
####################################################### DEBUG
import sys
####################################################### DEBUG

__author__ = "Bharath Ramsundar"
__copyright__ = "Copyright 2016, Stanford University"
@@ -77,6 +80,7 @@ class Dataset(object):
      ids, X, y, w = convert_df_to_numpy(df, feature_type, tasks, mol_id_field)
      ############################################################## DEBUG
      print("convert_df_to_numpy returned successfully")
      sys.stdout.flush()
      ############################################################## DEBUG
    else:
      ids, X, y, w = raw_data
@@ -606,10 +610,11 @@ def convert_df_to_numpy(df, feature_type, tasks, mol_id_field):
    print("SLKFJD:LSKJF:SLFKJ:SLDFKJSD:LKFJDSLKFJSDKFJSLKFJS:LFJSDLKJ")
    print("feature_type")
    print(feature_type)
    sys.stdout.flush()
    ############################################################## DEBUG

    raise ValueError(
        "Featurized data does not support requested feature_type.")
        "Featurized data does not support requested feature_type %s." % feature_type)
  # perform common train/test split across all tasks
  n_samples = df.shape[0]
  n_tasks = len(tasks)
+15 −16
Original line number Diff line number Diff line
@@ -37,12 +37,11 @@ class TestLoad(TestAPI):

    featurizer = CircularFingerprint(size=1024)
    tasks = ["log-solubility"]
    featurizer = DataFeaturizer(tasks=tasks,
    loader = DataFeaturizer(tasks=tasks,
                                smiles_field="smiles",
                                featurizer=featurizer,
                                verbosity=verbosity)
    dataset = featurizer.featurize(
        dataset_file, data_dir)
    dataset = loader.featurize(dataset_file, data_dir)

    X, y, w, ids = dataset.to_numpy()
    shutil.move(data_dir, moved_data_dir)
@@ -89,15 +88,15 @@ class TestLoad(TestAPI):

    # Featurize tox21 dataset
    print("About to featurize dataset.")
    featurizer = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    all_tasks = ["task%d"%i for i in range(17)] 

    ####### Do featurization
    featurizer = DataFeaturizer(tasks=all_tasks,
    loader = DataFeaturizer(tasks=all_tasks,
                                smiles_field="smiles",
                                featurizer=featurizer,
                                verbosity=verbosity)
    dataset = featurizer.featurize(
    dataset = loader.featurize(
        dataset_file, data_dir)

    # Do train/valid split.
@@ -161,11 +160,11 @@ class TestLoad(TestAPI):
    tasks = all_tasks[0:n_tasks]

    ####### Do multitask load
    featurizer = DataFeaturizer(tasks=tasks,
    loader = DataFeaturizer(tasks=tasks,
                            smiles_field="smiles",
                            featurizer=featurizer,
                            verbosity=verbosity)
    dataset = featurizer.featurize(dataset_file, data_dir)
    dataset = loader.featurize(dataset_file, data_dir)

    # Do train/valid split.
    X_multi, y_multi, w_multi, ids_multi = dataset.to_numpy()
@@ -177,11 +176,11 @@ class TestLoad(TestAPI):
      print("Processing task %s" % task)
      if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
      featurizer = DataFeaturizer(tasks=[task],
      loader = DataFeaturizer(tasks=[task],
                              smiles_field="smiles",
                              featurizer=featurizer,
                              verbosity=verbosity)
      dataset = featurizer.featurize(dataset_file, data_dir)
      dataset = loader.featurize(dataset_file, data_dir)

      X_task, y_task, w_task, ids_task = dataset.to_numpy()
      y_tasks.append(y_task)
+4 −4
Original line number Diff line number Diff line
@@ -40,16 +40,16 @@ class TestReload(TestAPI):
    print("Number of examples in dataset: %s" % str(raw_dataset.shape[0]))

    print("About to featurize compounds")
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    MUV_tasks = ['MUV-692', 'MUV-689', 'MUV-846', 'MUV-859', 'MUV-644',
                 'MUV-548', 'MUV-852', 'MUV-600', 'MUV-810', 'MUV-712',
                 'MUV-737', 'MUV-858', 'MUV-713', 'MUV-733', 'MUV-652',
                 'MUV-466', 'MUV-832']
    featurizer = DataFeaturizer(tasks=MUV_tasks,
    loader = DataFeaturizer(tasks=MUV_tasks,
                                smiles_field="smiles",
                                featurizers=featurizers,
                                featurizer=featurizer,
                                verbosity=verbosity)
    dataset = featurizer.featurize(dataset_file, self.data_dir)
    dataset = loader.featurize(dataset_file, self.data_dir)
    assert len(dataset) == len(raw_dataset)

    print("About to split compounds into train/valid/test")
+6 −6
Original line number Diff line number Diff line
@@ -32,13 +32,13 @@ class TestShuffle(TestAPI):
    dataset_file = os.path.join(
        current_dir, "../../models/tests/example.csv")

    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    tasks = ["log-solubility"]
    featurizer = DataFeaturizer(tasks=tasks,
    loader = DataFeaturizer(tasks=tasks,
                            smiles_field="smiles",
                                featurizers=featurizers,
                            featurizer=featurizer,
                            verbosity=verbosity)
    dataset = featurizer.featurize(
    dataset = loader.featurize(
        dataset_file, data_dir, shard_size=2)

    X_orig, y_orig, w_orig, orig_ids = dataset.to_numpy()
+30 −30
Original line number Diff line number Diff line
@@ -37,15 +37,15 @@ class TestFeaturizedSamples(TestAPI):
    task_type = "regression"
    task_types = {task: task_type for task in tasks}
    input_file = os.path.join(self.current_dir, "example.csv")
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)

    input_file = os.path.join(self.current_dir, input_file)
    featurizer = DataFeaturizer(tasks=tasks,
    loader = DataFeaturizer(tasks=tasks,
                            smiles_field=self.smiles_field,
                                featurizers=featurizers,
                            featurizer=featurizer,
                            verbosity="low")

    dataset = featurizer.featurize(input_file, self.data_dir)
    dataset = loader.featurize(input_file, self.data_dir)

    # Splits featurized samples into train/test
    splitter = ScaffoldSplitter()
@@ -65,15 +65,15 @@ class TestFeaturizedSamples(TestAPI):
    task_type = "regression"
    task_types = {task: task_type for task in tasks}
    input_file = os.path.join(self.current_dir, "example.csv")
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)

    input_file = os.path.join(self.current_dir, input_file)
    featurizer = DataFeaturizer(tasks=tasks,
    loader = DataFeaturizer(tasks=tasks,
                            smiles_field=self.smiles_field,
                                featurizers=featurizers,
                            featurizer=featurizer,
                            verbosity="low")

    dataset = featurizer.featurize(input_file, self.data_dir)
    dataset = loader.featurize(input_file, self.data_dir)

    # Splits featurized samples into train/test
    splitter = ScaffoldSplitter()
@@ -91,15 +91,15 @@ class TestFeaturizedSamples(TestAPI):
    task_type = "regression"
    task_types = {task: task_type for task in tasks}
    input_file = os.path.join(self.current_dir, "example.csv")
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)

    input_file = os.path.join(self.current_dir, input_file)
    featurizer = DataFeaturizer(tasks=tasks,
    loader = DataFeaturizer(tasks=tasks,
                            smiles_field=self.smiles_field,
                                featurizers=featurizers,
                            featurizer=featurizer,
                            verbosity="low")

    dataset = featurizer.featurize(input_file, self.data_dir)
    dataset = loader.featurize(input_file, self.data_dir)

    # Splits featurized samples into train/test
    splitter = RandomSplitter()
@@ -117,13 +117,13 @@ class TestFeaturizedSamples(TestAPI):
    task_type = "regression"
    task_types = {task: task_type for task in tasks}
    input_file = os.path.join(self.current_dir, "example.csv")
    featurizers = [CircularFingerprint(size=1024)]
    featurizer = DataFeaturizer(tasks=tasks,
    featurizer = CircularFingerprint(size=1024)
    loader = DataFeaturizer(tasks=tasks,
                            smiles_field=self.smiles_field,
                                featurizers=featurizers,
                            featurizer=featurizer,
                            verbosity="low")

    dataset = featurizer.featurize(input_file, self.data_dir)
    dataset = loader.featurize(input_file, self.data_dir)

    # Splits featurized samples into train/test
    splitter = RandomSplitter()
@@ -140,13 +140,13 @@ class TestFeaturizedSamples(TestAPI):
    dataset_file = os.path.join(
        self.current_dir, "example.csv")

    featurizers = [CircularFingerprint(size=1024)]
    featurizer = CircularFingerprint(size=1024)
    tasks = ["log-solubility"]
    featurizer = DataFeaturizer(tasks=tasks,
    loader = DataFeaturizer(tasks=tasks,
                            smiles_field="smiles",
                                featurizers=featurizers,
                            featurizer=featurizer,
                            verbosity=verbosity)
    featurized_dataset = featurizer.featurize(
    featurized_dataset = loader.featurize(
        dataset_file, data_dir)
    n_dataset = len(featurized_dataset)
  
Loading