Commit 83f11a11 authored by miaecle's avatar miaecle
Browse files

multitask DTNN

parent 13d859ec
Loading
Loading
Loading
Loading
+2 −5
Original line number Diff line number Diff line
@@ -10,9 +10,6 @@ import numpy as np
import tensorflow as tf
import sklearn.metrics
import tempfile
from deepchem.utils.save import log
from deepchem.models.tf_new_models.graph_topology import merge_dicts
from deepchem.models.tf_new_models.multitask_classifier import get_loss_fn
from deepchem.models.tf_new_models.multitask_regressor import MultitaskGraphRegressor


@@ -31,7 +28,7 @@ class DTNNRegressor(MultitaskGraphRegressor):
               beta2=.999,
               pad_batches=True,
               verbose=True):

    self.n_tasks  = n_tasks
    self.verbose = verbose
    self.n_tasks = n_tasks
    self.final_loss = final_loss
@@ -82,4 +79,4 @@ class DTNNRegressor(MultitaskGraphRegressor):
        dtype='float32', shape=(None, self.n_tasks), name="weight_placholder")

    outputs = self.model.return_outputs()
    return [outputs]
    return outputs
+4 −2
Original line number Diff line number Diff line
@@ -137,8 +137,10 @@ class MultitaskGraphRegressor(Model):
    task_losses = []
    # label_placeholder of shape (batch_size, n_tasks). Split into n_tasks
    # tensors of shape (batch_size,)
    task_labels = tf.split(1, self.n_tasks, self.label_placeholder)
    task_weights = tf.split(1, self.n_tasks, self.weight_placeholder)
    task_labels = tf.split(
        axis=1, num_or_size_splits=self.n_tasks, value=self.label_placeholder)
    task_weights = tf.split(
        axis=1, num_or_size_splits=self.n_tasks, value=self.weight_placeholder)
    for task in range(self.n_tasks):
      task_label_vector = task_labels[task]
      task_weight_vector = task_weights[task]
+23 −13
Original line number Diff line number Diff line
@@ -100,8 +100,8 @@ def graph_conv(atoms, deg_adj_lists, deg_slice, max_deg, min_deg, W_list,
  if min_deg == 0:
    deg = 0

    begin = tf.stack([deg_slice[deg - min_deg, 0], 0])
    size = tf.stack([deg_slice[deg - min_deg, 1], -1])
    begin = tf.pack([deg_slice[deg - min_deg, 0], 0])
    size = tf.pack([deg_slice[deg - min_deg, 1], -1])
    self_atoms = tf.slice(atoms, begin, size)

    # Only use the self layer
@@ -911,11 +911,13 @@ class DTNNStep(Layer):
class DTNNGather(Layer):

  def __init__(self, 
               n_tasks=1,
               n_features=20,
               n_hidden=20,
               n_hidden=50,
               init='glorot_uniform',
               activation='tanh',
               **kwargs):
    self.n_tasks = n_tasks
    self.n_features = n_features
    self.n_hidden = n_hidden
    self.init = initializations.get(init)  # Set weight initialization
@@ -924,13 +926,17 @@ class DTNNGather(Layer):
    super(DTNNGather, self).__init__(**kwargs)

  def build(self):
    self.W_out1 = self.init([self.n_features, self.n_hidden])
    self.W_out2 = self.init([self.n_hidden, 1])
    self.b_out1 = model_ops.zeros(shape=[self.n_hidden,])
    self.b_out2 = model_ops.zeros(shape=[1,])
    
    self.trainable_weights = [self.W_out1, self.W_out2, 
                              self.b_out1, self.b_out2]
    self.W_out1_list = []
    self.W_out2_list = []
    self.b_out1_list = []
    self.b_out2_list = []
    for i in range(self.n_tasks):
      self.W_out1_list.append(self.init([self.n_features, self.n_hidden]))
      self.W_out2_list.append(self.init([self.n_hidden, 1]))
      self.b_out1_list.append(model_ops.zeros(shape=[self.n_hidden,]))
      self.b_out2_list.append(model_ops.zeros(shape=[1,]))
    
    self.trainable_weights = self.W_out1_list + self.W_out2_list + self.b_out1_list + self.b_out2_list

  def call(self, x):
    """Execute this layer on input tensors.
@@ -946,8 +952,12 @@ class DTNNGather(Layer):
      Of shape (n_atoms, n_feat), where n_feat is number of atom_features
    """
    self.build()
    outputs = tf.tensordot(x, self.W_out1, [[2], [0]]) + self.b_out1
    outputs = self.activation(outputs)
    outputs = tf.tensordot(outputs, self.W_out2, [[2], [0]]) + self.b_out2
    outputs = tf.reduce_sum(tf.squeeze(outputs, axis=2), axis=1)
    outputs = []
    for i in range(self.n_tasks):
      output = tf.tensordot(x, self.W_out1_list[i], [[2], [0]]) + self.b_out1_list[i]
      output = self.activation(output)
      output = tf.tensordot(output, self.W_out2_list[i], [[2], [0]]) + self.b_out2_list[i]
      output = tf.reduce_sum(tf.squeeze(output, axis=2), axis=1)
      outputs.append(output)
      
    return outputs
+2 −2
Original line number Diff line number Diff line
@@ -41,8 +41,8 @@ model = dc.models.DTNNRegressor(
model.fit(train_dataset, nb_epoch=10)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)

print("Train scores")
print(train_scores)
+51 −0
Original line number Diff line number Diff line
"""
Script that trains graph-conv models on Tox21 dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc

# Load Tox21 dataset
tasks, datasets, transformers = dc.molnet.load_qm7b_from_mat()
train_dataset, valid_dataset, test_dataset = datasets

# Fit models
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")


# Batch size of models
batch_size = 50
graph_model = dc.nn.SequentialDTNNGraph(max_n_atoms=23, n_distance=100)
graph_model.add(dc.nn.DTNNEmbedding(n_features=20))
graph_model.add(dc.nn.DTNNStep(n_features=20, n_distance=100))
graph_model.add(dc.nn.DTNNStep(n_features=20, n_distance=100))
graph_model.add(dc.nn.DTNNGather(n_tasks=len(tasks)))

model = dc.models.DTNNRegressor(
    graph_model,
    n_tasks=len(tasks),
    batch_size=batch_size,
    learning_rate=1e-3,
    learning_rate_decay_time=1000,
    optimizer_type="adam",
    beta1=.9,
    beta2=.999)

# Fit trained model
model.fit(train_dataset, nb_epoch=10)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, [metric], transformers)
valid_scores = model.evaluate(valid_dataset, [metric], transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)