Commit c6c9993a authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Updated graph conv models

parent ff2d2ab0
Loading
Loading
Loading
Loading
+46 −49
Original line number Diff line number Diff line
@@ -6,20 +6,13 @@ from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from keras import backend as K
from chembl_datasets import load_chembl

# Only for debug!
np.random.seed(123)

g = tf.Graph()
sess = tf.Session(graph=g)
K.set_session(sess)

with g.as_default():
  tf.set_random_seed(123)
# Load ChEMBL dataset
chembl_tasks, datasets, transformers = load_chembl(shard_size=2000,
  featurizer="GraphConv", set="5thresh", split="random")
train_dataset, valid_dataset, test_dataset = datasets
@@ -33,23 +26,27 @@ with g.as_default():
# Batch size of models
batch_size = 128
graph_model = dc.nn.SequentialGraph(n_feat)
  graph_model.add(dc.nn.GraphConv(128, activation='relu'))
graph_model.add(dc.nn.GraphConv(128, n_feat, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
  graph_model.add(dc.nn.GraphConv(128, activation='relu'))
graph_model.add(dc.nn.GraphConv(128, 64, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
# Gather Projection
  graph_model.add(dc.nn.Dense(256, activation='relu'))
graph_model.add(dc.nn.Dense(256, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh"))
  # Dense post-processing layer

  with tf.Session() as sess:
model = dc.models.MultitaskGraphRegressor(
      sess, graph_model, len(chembl_tasks), batch_size=batch_size,
      learning_rate=1e-3, learning_rate_decay_time=1000,
      optimizer_type="adam", beta1=.9, beta2=.999)
  graph_model,
  len(chembl_tasks),
  n_feat,
  batch_size=batch_size,
  learning_rate=1e-3,
  learning_rate_decay_time=1000,
  optimizer_type="adam",
  beta1=.9,
  beta2=.999)

# Fit trained model
model.fit(train_dataset, nb_epoch=20)
+44 −52
Original line number Diff line number Diff line
@@ -7,22 +7,13 @@ from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
from keras import backend as K

tf.set_random_seed(123)
import deepchem as dc
from clintox_datasets import load_clintox

# Only for debug!
np.random.seed(123)

g = tf.Graph()
sess = tf.Session(graph=g)
K.set_session(sess)

with g.as_default():
# Load clintox dataset
  n_features = 1024
clintox_tasks, clintox_datasets, transformers = load_clintox(
    featurizer='GraphConv', split='random')
train_dataset, valid_dataset, test_dataset = clintox_datasets
@@ -37,26 +28,27 @@ with g.as_default():
# Batch size of models
batch_size = 50
graph_model = dc.nn.SequentialGraph(n_feat)
  graph_model.add(dc.nn.GraphConv(64, activation='relu'))
graph_model.add(dc.nn.GraphConv(64, n_feat, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
  graph_model.add(dc.nn.GraphConv(64, activation='relu'))
graph_model.add(dc.nn.GraphConv(64, 64, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
# Gather Projection
  graph_model.add(dc.nn.Dense(128, activation='relu'))
graph_model.add(dc.nn.Dense(128, 64, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh"))
  # Dense post-processing layer

  with tf.Session() as sess:
    model = dc.models.MultitaskGraphClassifier(sess, graph_model,
model = dc.models.MultitaskGraphClassifier(
    graph_model,
    len(clintox_tasks),
    n_feat,
    batch_size=batch_size,
    learning_rate=1e-3,
    learning_rate_decay_time=1000,
    optimizer_type="adam",
                                               beta1=.9, beta2=.999)
    beta1=.9,
    beta2=.999)

# Fit trained model
model.fit(train_dataset, nb_epoch=10)
+50 −53
Original line number Diff line number Diff line
@@ -6,21 +6,13 @@ from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from keras import backend as K
from delaney_datasets import load_delaney

# Only for debug!
np.random.seed(123)

g = tf.Graph()
sess = tf.Session(graph=g)
K.set_session(sess)

with g.as_default():
  # Load Tox21 dataset
  tf.set_random_seed(123)
# Load Delaney dataset
delaney_tasks, delaney_datasets, transformers = load_delaney(featurizer='GraphConv',split='index')
train_dataset, valid_dataset, test_dataset = delaney_datasets

@@ -33,23 +25,28 @@ with g.as_default():
# Batch size of models
batch_size = 128
graph_model = dc.nn.SequentialGraph(n_feat)
  graph_model.add(dc.nn.GraphConv(128, activation='relu'))
graph_model.add(dc.nn.GraphConv(128, n_feat, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
  graph_model.add(dc.nn.GraphConv(128, activation='relu'))
graph_model.add(dc.nn.GraphConv(128, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
# Gather Projection
  graph_model.add(dc.nn.Dense(256, activation='relu'))
graph_model.add(dc.nn.Dense(256, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh"))
# Dense post-processing layer

  with tf.Session() as sess:
model = dc.models.MultitaskGraphRegressor(
      sess, graph_model, len(delaney_tasks), batch_size=batch_size,
      learning_rate=1e-3, learning_rate_decay_time=1000,
      optimizer_type="adam", beta1=.9, beta2=.999)
  graph_model,
  len(delaney_tasks),
  n_feat,
  batch_size=batch_size,
  learning_rate=1e-3,
  learning_rate_decay_time=1000,
  optimizer_type="adam",
  beta1=.9,
  beta2=.999)

# Fit trained model
model.fit(train_dataset, nb_epoch=20)

examples/muv/muv_keras.py

deleted100644 → 0
+0 −43
Original line number Diff line number Diff line
"""
Script that trains Keras multitask models on MUV dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import os
import numpy as np
import shutil
import deepchem as dc
from muv_datasets import load_muv

# Set some global variables up top
np.random.seed(123)

# Load MUV data
muv_tasks, muv_datasets, transformers = load_muv()
train_dataset, valid_dataset, test_dataset = muv_datasets 
n_features = 1024 


# Build model
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, np.mean, mode="classification")

keras_model = dc.models.MultiTaskDNN(
    len(muv_tasks), n_features, "classification",
    dropout=.25, learning_rate=.001, decay=1e-4)
model = dc.models.KerasModel(keras_model, verbosity="high")

# Fit trained model
model.fit(train_dataset)
model.save()

train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)
+50 −56
Original line number Diff line number Diff line
@@ -6,21 +6,13 @@ from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc
from keras import backend as K
from sampl_datasets import load_sampl

# Only for debug!
np.random.seed(123)

g = tf.Graph()
sess = tf.Session(graph=g)
K.set_session(sess)

with g.as_default():
# Load Tox21 dataset
  tf.set_random_seed(123)
SAMPL_tasks, SAMPL_datasets, transformers = load_sampl(featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = SAMPL_datasets

@@ -33,23 +25,28 @@ with g.as_default():
# Batch size of models
batch_size = 128
graph_model = dc.nn.SequentialGraph(n_feat)
  graph_model.add(dc.nn.GraphConv(128, activation='relu'))
graph_model.add(dc.nn.GraphConv(128, n_feat, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
  graph_model.add(dc.nn.GraphConv(128, activation='relu'))
graph_model.add(dc.nn.GraphConv(128, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphPool())
# Gather Projection
  graph_model.add(dc.nn.Dense(256, activation='relu'))
graph_model.add(dc.nn.Dense(256, 128, activation='relu'))
graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh"))
# Dense post-processing layer

  with tf.Session() as sess:
model = dc.models.MultitaskGraphRegressor(
      sess, graph_model, len(SAMPL_tasks), batch_size=batch_size,
      learning_rate=1e-3, learning_rate_decay_time=1000,
      optimizer_type="adam", beta1=.9, beta2=.999)
  graph_model,
  len(SAMPL_tasks),
  n_feat,
  batch_size=batch_size,
  learning_rate=1e-3,
  learning_rate_decay_time=1000,
  optimizer_type="adam",
  beta1=.9,
  beta2=.999)

# Fit trained model
model.fit(train_dataset, nb_epoch=20)
@@ -63,6 +60,3 @@ with g.as_default():

print("Validation scores")
print(valid_scores)
    pred = model.predict(train_dataset, transformers)
    pred2 = model.predict(valid_dataset, transformers)
    pred3 = model.predict(test_dataset, transformers)
Loading