Commit 8cf6fc06 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #454 from miaecle/molnet

quick fix for dc->deepchem
parents cada73f0 770cca00
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

from deepchem.molnet.load_function.chembl_datasets import load_chembl
from deepchem.molnet.load_function.clintox_datasets import load_clintox
from deepchem.molnet.load_function.delaney_datasets import load_delaney
+11 −10
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ from __future__ import print_function
from __future__ import unicode_literals

import os
import deepchem as dc
import deepchem

from deepchem.molnet.load_function.chembl_tasks import chembl_tasks

@@ -68,13 +68,13 @@ def load_chembl(shard_size=2000,
  # Featurize ChEMBL dataset
  print("About to featurize ChEMBL dataset.")
  if featurizer == 'ECFP':
    featurizer = dc.feat.CircularFingerprint(size=1024)
    featurizer = deepchem.feat.CircularFingerprint(size=1024)
  elif featurizer == 'GraphConv':
    featurizer = dc.feat.ConvMolFeaturizer()
    featurizer = deepchem.feat.ConvMolFeaturizer()
  elif featurizer == 'Raw':
    featurizer = dc.feat.RawFeaturizer()
    featurizer = deepchem.feat.RawFeaturizer()

  loader = dc.data.CSVLoader(
  loader = deepchem.data.CSVLoader(
      tasks=chembl_tasks, smiles_field="smiles", featurizer=featurizer)

  if split == "year":
@@ -90,7 +90,7 @@ def load_chembl(shard_size=2000,
  print("About to transform data")
  if split == "year":
    transformers = [
        dc.trans.NormalizationTransformer(
        deepchem.trans.NormalizationTransformer(
            transform_y=True, dataset=train_dataset)
    ]
    for transformer in transformers:
@@ -99,15 +99,16 @@ def load_chembl(shard_size=2000,
      test = transformer.transform(test_dataset)
  else:
    transformers = [
        dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset)
        deepchem.trans.NormalizationTransformer(
            transform_y=True, dataset=dataset)
    ]
    for transformer in transformers:
      dataset = transformer.transform(dataset)

  splitters = {
      'index': dc.splits.IndexSplitter(),
      'random': dc.splits.RandomSplitter(),
      'scaffold': dc.splits.ScaffoldSplitter()
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
      'scaffold': deepchem.splits.ScaffoldSplitter()
  }

  if split in splitters:
+10 −10
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@ from __future__ import division
from __future__ import unicode_literals

import os
import deepchem as dc
import deepchem


def load_clintox(featurizer='ECFP', split='index'):
@@ -27,7 +27,7 @@ def load_clintox(featurizer='ECFP', split='index'):
    )

  print("About to load clintox dataset.")
  dataset = dc.utils.save.load_from_disk(dataset_file)
  dataset = deepchem.utils.save.load_from_disk(dataset_file)
  clintox_tasks = dataset.columns.values[1:].tolist()
  print("Tasks in dataset: %s" % (clintox_tasks))
  print("Number of tasks in dataset: %s" % str(len(clintox_tasks)))
@@ -36,20 +36,20 @@ def load_clintox(featurizer='ECFP', split='index'):
  # Featurize clintox dataset
  print("About to featurize clintox dataset.")
  if featurizer == 'ECFP':
    featurizer = dc.feat.CircularFingerprint(size=1024)
    featurizer = deepchem.feat.CircularFingerprint(size=1024)
  elif featurizer == 'GraphConv':
    featurizer = dc.feat.ConvMolFeaturizer()
    featurizer = deepchem.feat.ConvMolFeaturizer()
  elif featurizer == 'Raw':
    featurizer = dc.feat.RawFeaturizer()
    featurizer = deepchem.feat.RawFeaturizer()

  loader = dc.data.CSVLoader(
  loader = deepchem.data.CSVLoader(
      tasks=clintox_tasks, smiles_field="smiles", featurizer=featurizer)
  dataset = loader.featurize(dataset_file, shard_size=8192)

  # Transform clintox dataset
  print("About to transform clintox dataset.")
  transformers = [
      dc.trans.BalancingTransformer(transform_w=True, dataset=dataset)
      deepchem.trans.BalancingTransformer(transform_w=True, dataset=dataset)
  ]
  for transformer in transformers:
    dataset = transformer.transform(dataset)
@@ -57,9 +57,9 @@ def load_clintox(featurizer='ECFP', split='index'):
  # Split clintox dataset
  print("About to split clintox dataset.")
  splitters = {
      'index': dc.splits.IndexSplitter(),
      'random': dc.splits.RandomSplitter(),
      'scaffold': dc.splits.ScaffoldSplitter()
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
      'scaffold': deepchem.splits.ScaffoldSplitter()
  }
  splitter = splitters[split]
  train, valid, test = splitter.train_valid_test_split(dataset)
+10 −9
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ from __future__ import division
from __future__ import unicode_literals

import os
import deepchem as dc
import deepchem


def load_delaney(featurizer='ECFP', split='index'):
@@ -28,19 +28,20 @@ def load_delaney(featurizer='ECFP', split='index'):

  delaney_tasks = ['measured log solubility in mols per litre']
  if featurizer == 'ECFP':
    featurizer = dc.feat.CircularFingerprint(size=1024)
    featurizer = deepchem.feat.CircularFingerprint(size=1024)
  elif featurizer == 'GraphConv':
    featurizer = dc.feat.ConvMolFeaturizer()
    featurizer = deepchem.feat.ConvMolFeaturizer()
  elif featurizer == 'Raw':
    featurizer = dc.feat.RawFeaturizer()
    featurizer = deepchem.feat.RawFeaturizer()

  loader = dc.data.CSVLoader(
  loader = deepchem.data.CSVLoader(
      tasks=delaney_tasks, smiles_field="smiles", featurizer=featurizer)
  dataset = loader.featurize(dataset_file, shard_size=8192)

  # Initialize transformers 
  transformers = [
      dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset)
      deepchem.trans.NormalizationTransformer(
          transform_y=True, dataset=dataset)
  ]

  print("About to transform data")
@@ -48,9 +49,9 @@ def load_delaney(featurizer='ECFP', split='index'):
    dataset = transformer.transform(dataset)

  splitters = {
      'index': dc.splits.IndexSplitter(),
      'random': dc.splits.RandomSplitter(),
      'scaffold': dc.splits.ScaffoldSplitter()
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
      'scaffold': deepchem.splits.ScaffoldSplitter()
  }
  splitter = splitters[split]
  train, valid, test = splitter.train_valid_test_split(dataset)
+10 −10
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ from __future__ import division
from __future__ import unicode_literals

import os
import deepchem as dc
import deepchem


def load_hiv(featurizer='ECFP', split='index'):
@@ -27,18 +27,18 @@ def load_hiv(featurizer='ECFP', split='index'):

  hiv_tasks = ["HIV_active"]
  if featurizer == 'ECFP':
    featurizer = dc.feat.CircularFingerprint(size=1024)
    featurizer = deepchem.feat.CircularFingerprint(size=1024)
  elif featurizer == 'GraphConv':
    featurizer = dc.feat.ConvMolFeaturizer()
    featurizer = deepchem.feat.ConvMolFeaturizer()
  elif featurizer == 'Raw':
    featurizer = dc.feat.RawFeaturizer()
    featurizer = deepchem.feat.RawFeaturizer()

  loader = dc.data.CSVLoader(
  loader = deepchem.data.CSVLoader(
      tasks=hiv_tasks, smiles_field="smiles", featurizer=featurizer)
  dataset = loader.featurize(dataset_file, shard_size=8192)
  # Initialize transformers 
  transformers = [
      dc.trans.BalancingTransformer(transform_w=True, dataset=dataset)
      deepchem.trans.BalancingTransformer(transform_w=True, dataset=dataset)
  ]

  print("About to transform data")
@@ -46,10 +46,10 @@ def load_hiv(featurizer='ECFP', split='index'):
    dataset = transformer.transform(dataset)

  splitters = {
      'index': dc.splits.IndexSplitter(),
      'random': dc.splits.RandomSplitter(),
      'scaffold': dc.splits.ScaffoldSplitter(),
      'butina': dc.splits.ButinaSplitter()
      'index': deepchem.splits.IndexSplitter(),
      'random': deepchem.splits.RandomSplitter(),
      'scaffold': deepchem.splits.ScaffoldSplitter(),
      'butina': deepchem.splits.ButinaSplitter()
  }
  splitter = splitters[split]
  train, valid, test = splitter.train_valid_test_split(dataset)
Loading