Commit 7d0bad83 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Merge branch 'master' of https://github.com/deepchem/deepchem into trim

parents e392e133 f6e75cf0
Loading
Loading
Loading
Loading
+127 −115
Original line number Diff line number Diff line
@@ -338,6 +338,18 @@ class DTNNTensorGraph(TensorGraph):

        yield feed_dict

  def predict(self, dataset, transformers=[], outputs=None):
    if outputs is None:
      outputs = self.outputs
    if transformers != [] and not isinstance(outputs, collections.Sequence):
      raise ValueError(
          "DTNN does not support single tensor output with transformers")
    retval = super(DTNNTensorGraph, self).predict(dataset, outputs=outputs)
    if not isinstance(outputs, collections.Sequence):
      return retval
    retval = np.concatenate(retval, axis=-1)
    return undo_transforms(retval, transformers)


class DAGTensorGraph(TensorGraph):

examples/bace/bace_dnn.py

deleted100644 → 0
+0 −101
Original line number Diff line number Diff line
import sys
import os
import deepchem
import tempfile, shutil
import numpy as np
import numpy.random
from bace_datasets import load_bace
from deepchem.utils.save import load_from_disk
from deepchem.splits import SpecifiedSplitter
from deepchem.data import Dataset
from deepchem.hyper import HyperparamOpt
from deepchem import metrics
from deepchem.metrics import Metric
from deepchem.utils.evaluate import Evaluator
from deepchem.models.keras_models.fcnet import MultiTaskDNN
from deepchem.models.keras_models import KerasModel


def bace_dnn_model(mode="classification", verbosity="high", split="20-80"):
  """Train fully-connected DNNs on BACE dataset."""
  (bace_tasks, train_dataset, valid_dataset, test_dataset, crystal_dataset,
   transformers) = load_bace(mode=mode, transform=True, split=split)

  if mode == "regression":
    r2_metric = Metric(metrics.r2_score, verbosity=verbosity)
    rms_metric = Metric(metrics.rms_score, verbosity=verbosity)
    mae_metric = Metric(metrics.mae_score, verbosity=verbosity)
    all_metrics = [r2_metric, rms_metric, mae_metric]
    metric = r2_metric
  elif mode == "classification":
    roc_auc_metric = Metric(metrics.roc_auc_score, verbosity=verbosity)
    accuracy_metric = Metric(metrics.accuracy_score, verbosity=verbosity)
    mcc_metric = Metric(metrics.matthews_corrcoef, verbosity=verbosity)
    # Note sensitivity = recall
    recall_metric = Metric(metrics.recall_score, verbosity=verbosity)
    all_metrics = [accuracy_metric, mcc_metric, recall_metric, roc_auc_metric]
    metric = roc_auc_metric 
  else:
    raise ValueError("Invalid mode %s" % mode)

  params_dict = {"learning_rate": np.power(10., np.random.uniform(-5, -3, size=5)),
                 "decay": np.power(10, np.random.uniform(-6, -4, size=5)),
                 "nb_epoch": [40] }

  n_features = train_dataset.get_data_shape()[0]
  def model_builder(model_params, model_dir):
    keras_model = MultiTaskDNN(
        len(bace_tasks), n_features, "classification", dropout=.5,
        **model_params)
    return KerasModel(keras_model, model_dir)

  optimizer = HyperparamOpt(model_builder, verbosity="low")
  best_dnn, best_hyperparams, all_results = optimizer.hyperparam_search(
      params_dict, train_dataset, valid_dataset, transformers,
      metric=metric)

  if len(train_dataset) > 0:
    dnn_train_evaluator = Evaluator(best_dnn, train_dataset, transformers)            
    csv_out = "dnn_%s_%s_train.csv" % (mode, split)
    stats_out = "dnn_%s_%s_train_stats.txt" % (mode, split)
    dnn_train_score = dnn_train_evaluator.compute_model_performance(
        all_metrics, csv_out=csv_out, stats_out=stats_out)
    print("DNN Train set %s: %s" % (metric.name, str(dnn_train_score)))

  if len(valid_dataset) > 0:
    dnn_valid_evaluator = Evaluator(best_dnn, valid_dataset, transformers)            
    csv_out = "dnn_%s_%s_valid.csv" % (mode, split)
    stats_out = "dnn_%s_%s_valid_stats.txt" % (mode, split)
    dnn_valid_score = dnn_valid_evaluator.compute_model_performance(
        all_metrics, csv_out=csv_out, stats_out=stats_out)
    print("DNN Valid set %s: %s" % (metric.name, str(dnn_valid_score)))
                                                                                               
  if len(test_dataset) > 0:
    dnn_test_evaluator = Evaluator(best_dnn, test_dataset, transformers)
    csv_out = "dnn_%s_%s_test.csv" % (mode, split)
    stats_out = "dnn_%s_%s_test_stats.txt" % (mode, split)
    dnn_test_score = dnn_test_evaluator.compute_model_performance(
        all_metrics, csv_out=csv_out, stats_out=stats_out)
    print("DNN Test set %s: %s" % (metric.name, str(dnn_test_score)))

  if len(crystal_dataset) > 0:
    dnn_crystal_evaluator = Evaluator(best_dnn, crystal_dataset, transformers)
    csv_out = "dnn_%s_%s_crystal.csv" % (mode, split)
    stats_out = "dnn_%s_%s_crystal_stats.txt" % (mode, split)
    dnn_crystal_score = dnn_crystal_evaluator.compute_model_performance(
        all_metrics, csv_out=csv_out, stats_out=stats_out)
    print("DNN Crystal set %s: %s" % (metric.name, str(dnn_crystal_score)))

if __name__ == "__main__":
  print("Classifier DNN 20-80:")
  print("--------------------------------")
  bace_dnn_model(mode="classification", verbosity="high", split="20-80")
  print("Classifier DNN 80-20:")
  print("--------------------------------")
  bace_dnn_model(mode="classification", verbosity="high", split="80-20")
  print("Regressor DNN 20-80:")
  print("--------------------------------")
  bace_dnn_model(mode="regression", verbosity="high", split="20-80")
  print("Regressor DNN 80-20:")
  print("--------------------------------")
  bace_dnn_model(mode="regression", verbosity="high", split="80-20")
+6 −21
Original line number Diff line number Diff line
@@ -6,8 +6,12 @@ from __future__ import division
from __future__ import unicode_literals

import numpy as np

from models import GraphConvTensorGraph

np.random.seed(123)
import tensorflow as tf

tf.set_random_seed(123)
import deepchem as dc
from deepchem.molnet import load_chembl
@@ -25,28 +29,9 @@ metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
n_feat = 75
# Batch size of models
batch_size = 128
graph_model = dc.nn.SequentialGraph(n_feat)
graph_model.add(dc.nn.GraphConv(128, n_feat, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
graph_model.add(dc.nn.GraphConv(128, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
# Gather Projection
graph_model.add(dc.nn.Dense(256, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh"))

model = dc.models.MultitaskGraphRegressor(
    graph_model,
    len(chembl_tasks),
    n_feat,
    batch_size=batch_size,
    learning_rate=1e-3,
    learning_rate_decay_time=1000,
    optimizer_type="adam",
    beta1=.9,
    beta2=.999)
model = GraphConvTensorGraph(
    len(chembl_tasks), batch_size=batch_size, mode='regression')

# Fit trained model
model.fit(train_dataset, nb_epoch=20)
+6 −22
Original line number Diff line number Diff line
@@ -7,8 +7,12 @@ from __future__ import division
from __future__ import unicode_literals

import numpy as np

from models import GraphConvTensorGraph

np.random.seed(123)
import tensorflow as tf

tf.set_random_seed(123)
import deepchem as dc
from deepchem.molnet import load_clintox
@@ -27,28 +31,8 @@ metric = dc.metrics.Metric(
n_feat = 75
# Batch size of models
batch_size = 50
graph_model = dc.nn.SequentialGraph(n_feat)
graph_model.add(dc.nn.GraphConv(64, n_feat, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
graph_model.add(dc.nn.GraphConv(64, 64, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
# Gather Projection
graph_model.add(dc.nn.Dense(128, 64, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh"))

model = dc.models.MultitaskGraphClassifier(
    graph_model,
    len(clintox_tasks),
    n_feat,
    batch_size=batch_size,
    learning_rate=1e-3,
    learning_rate_decay_time=1000,
    optimizer_type="adam",
    beta1=.9,
    beta2=.999)
model = GraphConvTensorGraph(
    len(clintox_tasks), batch_size=batch_size, mode='classification')

# Fit trained model
model.fit(train_dataset, nb_epoch=10)

examples/delaney/delaney_DAG.py

deleted100644 → 0
+0 −69
Original line number Diff line number Diff line
"""
Script that trains DAG models on delaney dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc

# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = dc.molnet.load_delaney(
    featurizer='GraphConv', split='random')
train_dataset, valid_dataset, test_dataset = delaney_datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)

max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

transformer = dc.trans.DAGTransformer(max_atoms=max_atoms)
train_dataset.reshard(512)
train_dataset = transformer.transform(train_dataset)
valid_dataset.reshard(512)
valid_dataset = transformer.transform(valid_dataset)
test_dataset.reshard(512)
test_dataset = transformer.transform(test_dataset)
# Do setup required for tf/keras models
# Number of features on conv-mols
n_feat = 75
# Batch size of models
batch_size = 64
graph = dc.nn.SequentialDAGGraph(n_atom_feat=n_feat, max_atoms=max_atoms)
graph.add(
    dc.nn.DAGLayer(
        n_graph_feat=30,
        n_atom_feat=n_feat,
        max_atoms=max_atoms,
        batch_size=batch_size))
graph.add(dc.nn.DAGGather(n_graph_feat=30, max_atoms=max_atoms))

model = dc.models.MultitaskGraphRegressor(
    graph,
    len(delaney_tasks),
    n_feat,
    batch_size=batch_size,
    learning_rate=1e-3,
    learning_rate_decay_time=1000,
    optimizer_type="adam",
    beta1=.9,
    beta2=.999)

# Fit trained model
model.fit(train_dataset, nb_epoch=20, log_every_N_batches=5)
print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)
Loading