Commit 4a827ea8 authored by miaecle's avatar miaecle
Browse files

low data benchmark

parent 155e3fba
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -26,3 +26,4 @@ from deepchem.molnet.load_function.tox21_datasets import load_tox21
from deepchem.molnet.load_function.toxcast_datasets import load_toxcast

from deepchem.molnet.run_benchmark import run_benchmark
from deepchem.molnet.run_benchmark_low_data import run_benchmark_low_data
+12 −3
Original line number Diff line number Diff line
@@ -29,6 +29,9 @@ CheckFeaturizer = {
('muv', 'rf'):            ['ECFP', 1024],
('muv', 'irv'):           ['ECFP', 1024],
('muv', 'graphconv'):     ['GraphConv', 75],
('muv', 'siamese'):       ['GraphConv', 75],
('muv', 'attn'):          ['GraphConv', 75],
('muv', 'res'):           ['GraphConv', 75],
('pcba', 'logreg'):       ['ECFP', 1024],
('pcba', 'tf'):           ['ECFP', 1024],
('pcba', 'tf_robust'):    ['ECFP', 1024],
@@ -41,12 +44,18 @@ CheckFeaturizer = {
('sider', 'rf'):          ['ECFP', 1024],
('sider', 'irv'):         ['ECFP', 1024],
('sider', 'graphconv'):   ['GraphConv', 75],
('sider', 'siamese'):     ['GraphConv', 75],
('sider', 'attn'):        ['GraphConv', 75],
('sider', 'res'):         ['GraphConv', 75],
('tox21', 'logreg'):      ['ECFP', 1024],
('tox21', 'tf'):          ['ECFP', 1024],
('tox21', 'tf_robust'):   ['ECFP', 1024],
('tox21', 'rf'):          ['ECFP', 1024],
('tox21', 'irv'):         ['ECFP', 1024],
('tox21', 'graphconv'):   ['GraphConv', 75],
('tox21', 'siamese'):     ['GraphConv', 75],
('tox21', 'attn'):        ['GraphConv', 75],
('tox21', 'res'):         ['GraphConv', 75],
('toxcast', 'logreg'):    ['ECFP', 1024],
('toxcast', 'tf'):        ['ECFP', 1024],
('toxcast', 'tf_robust'): ['ECFP', 1024],
@@ -104,7 +113,7 @@ CheckSplit = {
'hopv':    ['index', 'random', 'scaffold', 'butina'],
'kaggle': ['index'], # already splitted, no splitter required
'lipo':    ['index', 'random', 'scaffold'],
'muv':    ['index', 'random', 'scaffold'], 
'muv':    ['index', 'random', 'scaffold', 'task'], 
'nci':    ['index', 'random', 'scaffold'],
'pcba':   ['index', 'random', 'scaffold'],
'pdbbind':['index', 'random'],
@@ -114,7 +123,7 @@ CheckSplit = {
'qm8':    ['index', 'random', 'stratified'],
'qm9':    ['index', 'random', 'stratified'],
'sampl':  ['index', 'random', 'scaffold'],
'sider':  ['index', 'random', 'scaffold'],
'tox21':  ['index', 'random', 'scaffold', 'butina'],
'sider':  ['index', 'random', 'scaffold', 'task'],
'tox21':  ['index', 'random', 'scaffold', 'butina', 'task'],
'toxcast':['index', 'random', 'scaffold']
}
+10 −4
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ import os
import deepchem


def load_muv(featurizer='ECFP', split='index'):
def load_muv(featurizer='ECFP', split='index', K=4):
  """Load MUV datasets. Does not do train/test split"""
  # Load MUV dataset
  print("About to load MUV dataset.")
@@ -56,8 +56,14 @@ def load_muv(featurizer='ECFP', split='index'):
  splitters = {
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
      'scaffold': deepchem.splits.ScaffoldSplitter()
      'scaffold': deepchem.splits.ScaffoldSplitter(),
      'task': deepchem.splits.TaskSplitter()
  }
  splitter = splitters[split]
  if split == 'task':
    fold_datasets = splitter.k_fold_split(dataset, K)
    all_dataset = fold_datasets
  else:
    train, valid, test = splitter.train_valid_test_split(dataset)
  return MUV_tasks, (train, valid, test), transformers
    all_dataset = (train, valid, test)
  return MUV_tasks, all_dataset, transformers
+10 −5
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ import os
import deepchem


def load_sider(featurizer='ECFP', split='index'):
def load_sider(featurizer='ECFP', split='index', K=4):
  print("About to load MUV dataset.")
  if "DEEPCHEM_DATA_DIR" in os.environ:
    data_dir = os.environ["DEEPCHEM_DATA_DIR"]
@@ -56,9 +56,14 @@ def load_sider(featurizer='ECFP', split='index'):
  splitters = {
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
      'scaffold': deepchem.splits.ScaffoldSplitter()
      'scaffold': deepchem.splits.ScaffoldSplitter(),
      'task': deepchem.splits.TaskSplitter()
  }
  splitter = splitters[split]
  if split == 'task':
    fold_datasets = splitter.k_fold_split(dataset, K)
    all_dataset = fold_datasets
  else:
    train, valid, test = splitter.train_valid_test_split(dataset)

  return SIDER_tasks, (train, valid, test), transformers
    all_dataset = (train, valid, test)
  return SIDER_tasks, all_dataset, transformers
+10 −4
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ import os
import deepchem


def load_tox21(featurizer='ECFP', split='index'):
def load_tox21(featurizer='ECFP', split='index', K=4):
  """Load Tox21 datasets. Does not do train/test split"""
  # Featurize Tox21 dataset
  if "DEEPCHEM_DATA_DIR" in os.environ:
@@ -52,8 +52,14 @@ def load_tox21(featurizer='ECFP', split='index'):
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
      'scaffold': deepchem.splits.ScaffoldSplitter(),
      'butina': deepchem.splits.ButinaSplitter()
      'butina': deepchem.splits.ButinaSplitter(),
      'task': deepchem.splits.TaskSplitter()
  }
  splitter = splitters[split]
  if split == 'task':
    fold_datasets = splitter.k_fold_split(dataset, K)
    all_dataset = fold_datasets
  else:
    train, valid, test = splitter.train_valid_test_split(dataset)
  return tox21_tasks, (train, valid, test), transformers
    all_dataset = (train, valid, test)
  return tox21_tasks, all_dataset, transformers
Loading