Unverified Commit 30f2dd5e authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1251 from miaecle/move_mean

Performance fix for qm7
parents e3b63891 9ef09f2a
Loading
Loading
Loading
Loading
+10 −3
Original line number Diff line number Diff line
@@ -12,13 +12,20 @@ from deepchem.molnet.load_function.bace_features import bace_user_specified_feat
logger = logging.getLogger(__name__)


def load_bace_regression(featurizer='ECFP', split='random', reload=True):
def load_bace_regression(featurizer='ECFP',
                         split='random',
                         reload=True,
                         move_mean=True):
  """Load bace datasets."""
  # Featurize bace dataset
  logger.info("About to featurize bace dataset.")
  data_dir = deepchem.utils.get_data_dir()
  if reload:
    save_dir = os.path.join(data_dir, "bace_r/" + featurizer + "/" + str(split))
    if move_mean:
      dir_name = "bace_r/" + featurizer + "/" + str(split)
    else:
      dir_name = "bace_r/" + featurizer + "_mean_unmoved/" + str(split)
    save_dir = os.path.join(data_dir, dir_name)

  dataset_file = os.path.join(data_dir, "bace.csv")

@@ -53,7 +60,7 @@ def load_bace_regression(featurizer='ECFP', split='random', reload=True):
  # Initialize transformers
  transformers = [
      deepchem.trans.NormalizationTransformer(
          transform_y=True, dataset=dataset)
          transform_y=True, dataset=dataset, move_mean=move_mean)
  ]

  logger.info("About to transform data")
+10 −4
Original line number Diff line number Diff line
@@ -11,15 +11,21 @@ import deepchem
logger = logging.getLogger(__name__)


def load_clearance(featurizer='ECFP', split='random', reload=True):
def load_clearance(featurizer='ECFP',
                   split='random',
                   reload=True,
                   move_mean=True):
  """Load clearance datasets."""
  # Featurize clearance dataset
  logger.info("About to featurize clearance dataset.")
  logger.info("About to load clearance dataset.")
  data_dir = deepchem.utils.get_data_dir()
  if reload:
    save_dir = os.path.join(data_dir,
                            "clearance/" + featurizer + "/" + str(split))
    if move_mean:
      dir_name = "clearance/" + featurizer + "/" + str(split)
    else:
      dir_name = "clearance/" + featurizer + "_mean_unmoved/" + str(split)
    save_dir = os.path.join(data_dir, dir_name)

  dataset_file = os.path.join(data_dir, "clearance.csv")
  if not os.path.exists(dataset_file):
@@ -51,7 +57,7 @@ def load_clearance(featurizer='ECFP', split='random', reload=True):
  # Initialize transformers
  transformers = [
      deepchem.trans.NormalizationTransformer(
          transform_y=True, dataset=dataset)
          transform_y=True, dataset=dataset, move_mean=move_mean)
  ]

  logger.info("About to transform data")
+7 −4
Original line number Diff line number Diff line
@@ -11,14 +11,17 @@ import deepchem
logger = logging.getLogger(__name__)


def load_delaney(featurizer='ECFP', split='index', reload=True):
def load_delaney(featurizer='ECFP', split='index', reload=True, move_mean=True):
  """Load delaney datasets."""
  # Featurize Delaney dataset
  logger.info("About to featurize Delaney dataset.")
  data_dir = deepchem.utils.get_data_dir()
  if reload:
    save_dir = os.path.join(data_dir,
                            "delaney/" + featurizer + "/" + str(split))
    if move_mean:
      dir_name = "delaney/" + featurizer + "/" + str(split)
    else:
      dir_name = "delaney/" + featurizer + "_mean_unmoved/" + str(split)
    save_dir = os.path.join(data_dir, dir_name)

  dataset_file = os.path.join(data_dir, "delaney-processed.csv")

@@ -50,7 +53,7 @@ def load_delaney(featurizer='ECFP', split='index', reload=True):
  # Initialize transformers
  transformers = [
      deepchem.trans.NormalizationTransformer(
          transform_y=True, dataset=dataset)
          transform_y=True, dataset=dataset, move_mean=move_mean)
  ]

  logger.info("About to transform data")
+7 −3
Original line number Diff line number Diff line
@@ -11,14 +11,18 @@ import deepchem
logger = logging.getLogger(__name__)


def load_lipo(featurizer='ECFP', split='index', reload=True):
def load_lipo(featurizer='ECFP', split='index', reload=True, move_mean=True):
  """Load Lipophilicity datasets."""
  # Featurize Lipophilicity dataset
  logger.info("About to featurize Lipophilicity dataset.")
  logger.info("About to load Lipophilicity dataset.")
  data_dir = deepchem.utils.get_data_dir()
  if reload:
    save_dir = os.path.join(data_dir, "lipo/" + featurizer + "/" + str(split))
    if move_mean:
      dir_name = "lipo/" + featurizer + "/" + str(split)
    else:
      dir_name = "lipo/" + featurizer + "_mean_unmoved/" + str(split)
    save_dir = os.path.join(data_dir, dir_name)

  dataset_file = os.path.join(data_dir, "Lipophilicity.csv")
  if not os.path.exists(dataset_file):
@@ -50,7 +54,7 @@ def load_lipo(featurizer='ECFP', split='index', reload=True):
  # Initialize transformers
  transformers = [
      deepchem.trans.NormalizationTransformer(
          transform_y=True, dataset=dataset)
          transform_y=True, dataset=dataset, move_mean=move_mean)
  ]

  logger.info("About to transform data")
+16 −7
Original line number Diff line number Diff line
@@ -12,10 +12,15 @@ import scipy.io

def load_qm7_from_mat(featurizer='CoulombMatrix',
                      split='stratified',
                      reload=True):
                      reload=True,
                      move_mean=True):
  data_dir = deepchem.utils.get_data_dir()
  if reload:
    save_dir = os.path.join(data_dir, "qm7/" + featurizer + "/" + str(split))
    if move_mean:
      dir_name = "qm7/" + featurizer + "/" + str(split)
    else:
      dir_name = "qm7/" + featurizer + "_mean_unmoved/" + str(split)
    save_dir = os.path.join(data_dir, dir_name)

  qm7_tasks = ["u0_atom"]

@@ -83,7 +88,7 @@ def load_qm7_from_mat(featurizer='CoulombMatrix',

    transformers = [
        deepchem.trans.NormalizationTransformer(
            transform_y=True, dataset=train_dataset)
            transform_y=True, dataset=train_dataset, move_mean=move_mean)
    ]

    for transformer in transformers:
@@ -99,7 +104,8 @@ def load_qm7_from_mat(featurizer='CoulombMatrix',

def load_qm7b_from_mat(featurizer='CoulombMatrix',
                       split='stratified',
                       reload=True):
                       reload=True,
                       move_mean=True):
  data_dir = deepchem.utils.get_data_dir()
  dataset_file = os.path.join(data_dir, "qm7b.mat")

@@ -129,7 +135,7 @@ def load_qm7b_from_mat(featurizer='CoulombMatrix',

    transformers = [
        deepchem.trans.NormalizationTransformer(
            transform_y=True, dataset=train_dataset)
            transform_y=True, dataset=train_dataset, move_mean=move_mean)
    ]

    for transformer in transformers:
@@ -141,7 +147,10 @@ def load_qm7b_from_mat(featurizer='CoulombMatrix',
    return qm7_tasks, (train_dataset, valid_dataset, test_dataset), transformers


def load_qm7(featurizer='CoulombMatrix', split='random', reload=True):
def load_qm7(featurizer='CoulombMatrix',
             split='random',
             reload=True,
             move_mean=True):
  """Load qm7 datasets."""
  # Featurize qm7 dataset
  print("About to featurize qm7 dataset.")
@@ -178,7 +187,7 @@ def load_qm7(featurizer='CoulombMatrix', split='random', reload=True):

  transformers = [
      deepchem.trans.NormalizationTransformer(
          transform_y=True, dataset=train_dataset)
          transform_y=True, dataset=train_dataset, move_mean=move_mean)
  ]

  for transformer in transformers:
Loading