Commit f7afbc25 authored by Vignesh's avatar Vignesh
Browse files

Added yapf formatting

parent 1f2102f0
Loading
Loading
Loading
Loading
+15 −10
Original line number Diff line number Diff line
@@ -25,7 +25,6 @@ TEST_FILENAME = "FACTORS_test2_disguised_combined_full.csv.gz"


def remove_missing_entries(dataset):

  """Remove missing entries.

  Some of the datasets have missing entries that sneak in as zero'd out
@@ -43,7 +42,6 @@ def remove_missing_entries(dataset):


def get_transformers(train_dataset):

  """Gets transformers applied to the dataset"""

  transformers = list()
@@ -52,8 +50,12 @@ def get_transformers(train_dataset):
  return transformers


def gen_factors(FACTORS_tasks, data_dir, train_dir, valid_dir, test_dir, shard_size=2000):

def gen_factors(FACTORS_tasks,
                data_dir,
                train_dir,
                valid_dir,
                test_dir,
                shard_size=2000):
  """Loads the FACTORS dataset; does not do train/test split"""

  time1 = time.time()
@@ -78,7 +80,8 @@ def gen_factors(FACTORS_tasks, data_dir, train_dir, valid_dir, test_dir, shard_s
  # Featurize the FACTORS dataset
  logger.info("About to featurize the FACTORS dataset")
  featurizer = deepchem.feat.UserDefinedFeaturizer(merck_descriptors)
  loader = deepchem.data.UserCSVLoader(tasks=FACTORS_tasks, id_field="Molecule", featurizer=featurizer)
  loader = deepchem.data.UserCSVLoader(
      tasks=FACTORS_tasks, id_field="Molecule", featurizer=featurizer)

  logger.info("Featurizing the train dataset...")
  train_dataset = loader.featurize(train_files, shard_size=shard_size)
@@ -103,7 +106,8 @@ def gen_factors(FACTORS_tasks, data_dir, train_dir, valid_dir, test_dir, shard_s
  transformers = get_transformers(train_dataset)

  for transformer in transformers:
    logger.info("Performing transformations with {}".format(transformer.__class__.__name__))
    logger.info("Performing transformations with {}".format(
        transformer.__class__.__name__))

    logger.info("Transforming the training dataset...")
    train_dataset = transformer.transform(train_dataset)
@@ -135,11 +139,12 @@ def gen_factors(FACTORS_tasks, data_dir, train_dir, valid_dir, test_dir, shard_s


def load_factors(shard_size=2000, featurizer=None, split=None, reload=True):

  """Loads FACTOR dataset; does not do train/test split"""

  FACTORS_tasks = ['T_00001', 'T_00002', 'T_00003', 'T_00004', 'T_00005', 'T_00006',
                   'T_00007', 'T_00008', 'T_00009', 'T_00010', 'T_00011', 'T_00012']
  FACTORS_tasks = [
      'T_00001', 'T_00002', 'T_00003', 'T_00004', 'T_00005', 'T_00006',
      'T_00007', 'T_00008', 'T_00009', 'T_00010', 'T_00011', 'T_00012'
  ]

  data_dir = deepchem.utils.get_data_dir()
  data_dir = os.path.join(data_dir, "factors")
+33 −26
Original line number Diff line number Diff line
@@ -21,12 +21,10 @@ TRAIN_FILENAME = "KINASE_training_disguised_combined_full.csv.gz"
VALID_FILENAME = "KINASE_test1_disguised_combined_full.csv.gz"
TEST_FILENAME = "KINASE_test2_disguised_combined_full.csv.gz"


logger = logging.getLogger(__name__)


def remove_missing_entries(dataset):

  """Remove missing entries.

  Some of the datasets have missing entries that sneak in as zero'd out
@@ -44,7 +42,6 @@ def remove_missing_entries(dataset):


def get_transformers(train_dataset):

  """Gets transformers applied to the dataset"""
  #TODO: Check for this

@@ -53,7 +50,12 @@ def get_transformers(train_dataset):
  return transformers


def gen_kinase(KINASE_tasks, train_dir, valid_dir, test_dir, data_dir, shard_size=2000):
def gen_kinase(KINASE_tasks,
               train_dir,
               valid_dir,
               test_dir,
               data_dir,
               shard_size=2000):

  time1 = time.time()

@@ -85,10 +87,12 @@ def gen_kinase(KINASE_tasks, train_dir, valid_dir, test_dir, data_dir, shard_siz
      tasks=KINASE_tasks, id_field="Molecule", featurizer=featurizer)

  logger.info("Featurizing train datasets...")
  train_dataset = loader.featurize(input_files=train_files, shard_size=shard_size)
  train_dataset = loader.featurize(
      input_files=train_files, shard_size=shard_size)

  logger.info("Featurizing validation datasets...")
  valid_dataset = loader.featurize(input_files=valid_files, shard_size=shard_size)
  valid_dataset = loader.featurize(
      input_files=valid_files, shard_size=shard_size)

  logger.info("Featurizing test datasets....")
  test_dataset = loader.featurize(input_files=test_files, shard_size=shard_size)
@@ -107,7 +111,8 @@ def gen_kinase(KINASE_tasks, train_dir, valid_dir, test_dir, data_dir, shard_siz
  transformers = get_transformers(train_dataset)

  for transformer in transformers:
    logger.info("Performing transformations with {}".format(transformer.__class__.__name__))
    logger.info("Performing transformations with {}".format(
        transformer.__class__.__name__))

    logger.info("Transforming the training dataset...")
    train_dataset = transformer.transform(train_dataset)
@@ -143,7 +148,8 @@ def load_kinase(shard_size=2000, featurizer=None, split=None, reload=True):

  "Loads kinase datasets, does not do train/test split"

  KINASE_tasks = ['T_00013', 'T_00014', 'T_00015', 'T_00016', 'T_00017', 'T_00018',
  KINASE_tasks = [
      'T_00013', 'T_00014', 'T_00015', 'T_00016', 'T_00017', 'T_00018',
      'T_00019', 'T_00020', 'T_00021', 'T_00022', 'T_00023', 'T_00024',
      'T_00025', 'T_00026', 'T_00027', 'T_00028', 'T_00029', 'T_00030',
      'T_00031', 'T_00032', 'T_00033', 'T_00034', 'T_00035', 'T_00036',
@@ -159,7 +165,8 @@ def load_kinase(shard_size=2000, featurizer=None, split=None, reload=True):
      'T_00091', 'T_00092', 'T_00093', 'T_00094', 'T_00095', 'T_00096',
      'T_00097', 'T_00098', 'T_00099', 'T_00100', 'T_00101', 'T_00102',
      'T_00103', 'T_00104', 'T_00105', 'T_00106', 'T_00107', 'T_00108',
                  'T_00109', 'T_00110', 'T_00111']
      'T_00109', 'T_00110', 'T_00111'
  ]

  data_dir = deepchem.utils.get_data_dir()
  data_dir = os.path.join(data_dir, "kinase")
+10 −10
Original line number Diff line number Diff line
@@ -52,7 +52,6 @@ def get_transformers(train_dataset):


def gen_uv(UV_tasks, data_dir, train_dir, valid_dir, test_dir, shard_size=2000):

  """Loading the UV dataset; does not do train/test split"""

  time1 = time.time()
@@ -84,10 +83,12 @@ def gen_uv(UV_tasks, data_dir, train_dir, valid_dir, test_dir, shard_size=2000):
      tasks=UV_tasks, id_field="Molecule", featurizer=featurizer)

  logger.info("Featurizing train datasets...")
  train_dataset = loader.featurize(input_files=train_files, shard_size=shard_size)
  train_dataset = loader.featurize(
      input_files=train_files, shard_size=shard_size)

  logger.info("Featurizing validation datasets...")
  valid_dataset = loader.featurize(input_files=valid_files, shard_size=shard_size)
  valid_dataset = loader.featurize(
      input_files=valid_files, shard_size=shard_size)

  logger.info("Featurizing test datasets....")
  test_dataset = loader.featurize(input_files=test_files, shard_size=shard_size)
@@ -107,7 +108,8 @@ def gen_uv(UV_tasks, data_dir, train_dir, valid_dir, test_dir, shard_size=2000):
  transformers = get_transformers(train_dataset)

  for transformer in transformers:
    logger.info("Performing transformations with {}".format(transformer.__class__.__name__))
    logger.info("Performing transformations with {}".format(
        transformer.__class__.__name__))

    logger.info("Transforming the training dataset...")
    train_dataset = transformer.transform(train_dataset)
@@ -139,7 +141,6 @@ def gen_uv(UV_tasks, data_dir, train_dir, valid_dir, test_dir, shard_size=2000):


def load_uv(shard_size=2000, featurizer=None, split=None, reload=True):

  """Load UV dataset; does not do train/test split"""

  data_dir = deepchem.utils.get_data_dir()
@@ -168,5 +169,4 @@ def load_uv(shard_size=2000, featurizer=None, split=None, reload=True):

  transformers = get_transformers(train_dataset)

  return UV_tasks, (train_dataset, valid_dataset,
                    test_dataset), transformers
  return UV_tasks, (train_dataset, valid_dataset, test_dataset), transformers
+30 −28
Original line number Diff line number Diff line
UV_tasks = ['w__210', 'w__211', 'w__212', 'w__213', 'w__214', 'w__215', 'w__216',
UV_tasks = [
    'w__210', 'w__211', 'w__212', 'w__213', 'w__214', 'w__215', 'w__216',
    'w__217', 'w__218', 'w__219', 'w__220', 'w__221', 'w__222', 'w__223',
    'w__224', 'w__225', 'w__226', 'w__227', 'w__228', 'w__229', 'w__230',
    'w__231', 'w__232', 'w__233', 'w__234', 'w__235', 'w__236', 'w__237',
@@ -25,4 +26,5 @@ UV_tasks = ['w__210', 'w__211', 'w__212', 'w__213', 'w__214', 'w__215', 'w__216'
    'w__378', 'w__379', 'w__380', 'w__381', 'w__382', 'w__383', 'w__384',
    'w__385', 'w__386', 'w__387', 'w__388', 'w__389', 'w__390', 'w__391',
    'w__392', 'w__393', 'w__394', 'w__395', 'w__396', 'w__397', 'w__398',
            'w__399', 'w__400']
    'w__399', 'w__400'
]