Commit 104dfbec authored by miaecle's avatar miaecle
Browse files

gdb7

parent 1a0076e6
Loading
Loading
Loading
Loading
+66 −0
Original line number Diff line number Diff line
"""
Script that trains graph-conv models on Tox21 dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import numpy as np
import tensorflow as tf
import deepchem as dc
from keras import backend as K
from gdb7_datasets import load_gdb7

# Only for debug!
np.random.seed(123)

g = tf.Graph()
sess = tf.Session(graph=g)
K.set_session(sess)

with g.as_default():
  # Load Tox21 dataset
  tf.set_random_seed(123)
  gdb7_tasks, gdb7_datasets, transformers = load_gdb7(featurizer='GraphConv', split='indice')
  train_dataset, valid_dataset, test_dataset = gdb7_datasets

  # Fit models
  metric = [dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"), 
            dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")]

  # Do setup required for tf/keras models
  # Number of features on conv-mols
  n_feat = 75
  # Batch size of models
  batch_size = 128
  graph_model = dc.nn.SequentialGraph(n_feat)
  graph_model.add(dc.nn.GraphConv(128, activation='relu'))
  graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
  graph_model.add(dc.nn.GraphPool())
  graph_model.add(dc.nn.GraphConv(128, activation='relu'))
  graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
  graph_model.add(dc.nn.GraphPool())
  # Gather Projection
  graph_model.add(dc.nn.Dense(256, activation='relu'))
  graph_model.add(dc.nn.BatchNormalization(epsilon=1e-5, mode=1))
  graph_model.add(dc.nn.GraphGather(batch_size, activation="tanh"))
  # Dense post-processing layer

  with tf.Session() as sess:
    model = dc.models.MultitaskGraphRegressor(
      sess, graph_model, len(gdb7_tasks), batch_size=batch_size,
      learning_rate=1e-3, learning_rate_decay_time=1000,
      optimizer_type="adam", beta1=.9, beta2=.999)

    # Fit trained model
    model.fit(train_dataset, nb_epoch=20)

    print("Evaluating model")
    train_scores = model.evaluate(train_dataset, metric, transformers)
    valid_scores = model.evaluate(valid_dataset, metric, transformers)

    print("Train scores")
    print(train_scores)

    print("Validation scores")
    print(valid_scores)
+6835 −0

File added.

Preview size limit exceeded, changes collapsed.

+42 −0
Original line number Diff line number Diff line
"""
Script that trains multitask models on gdb7 dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import os
import shutil
import numpy as np
import deepchem as dc
from gdb7_datasets import load_gdb7

# Only for debug!
np.random.seed(123)

# Load gdb7 dataset
n_features = 1024
gdb7_tasks, gdb7_datasets, transformers = load_gdb7(featurizer='ECFP', split='indice')
train_dataset, valid_dataset, test_dataset = gdb7_datasets

# Fit models
metric = [dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"), 
          dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")]

model = dc.models.TensorflowMultiTaskRegressor(
    len(gdb7_tasks), n_features, layer_sizes=[1000], dropouts=[.25],
    learning_rate=0.001, batch_size=50, verbosity="high")

# Fit trained model
model.fit(train_dataset)
model.save()

print("Evaluating model")
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)
+6 −2
Original line number Diff line number Diff line
@@ -9,13 +9,17 @@ import os
import deepchem as dc
import numpy as np
from gdb7_datasets import load_gdb7_from_mat
from gdb7_datasets import load_gdb7

np.random.seed(123)
split = 0
num_atoms = 23

gdb7_tasks, datasets, transformers = load_gdb7_from_mat(split)
train_dataset, test_dataset = datasets
#gdb7_tasks, datasets, transformers = load_gdb7_from_mat(split)
#train_dataset, test_dataset = datasets

gdb7_tasks, datasets, transformers = load_gdb7(split='indice')
train_dataset, test_dataset, _ = datasets

fit_transformers = [dc.trans.CoulombFitTransformer(train_dataset)]
regression_metric = [dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),