Commit 6c6b5faa authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #465 from miaecle/DTNN

Deep Tensor Neural Networks for QM series
parents 9eef9e66 812a511d
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -9,6 +9,8 @@ from deepchem.models.models import Model
from deepchem.models.sklearn_models import SklearnModel
from deepchem.models.tf_new_models.multitask_classifier import MultitaskGraphClassifier
from deepchem.models.tf_new_models.multitask_regressor import MultitaskGraphRegressor
from deepchem.models.tf_new_models.DTNN_regressor import DTNNGraphRegressor

from deepchem.models.tf_new_models.support_classifier import SupportGraphClassifier
from deepchem.models.multitask import SingletaskToMultitask
from deepchem.models.sequential import Sequential
+10.1 KiB

File added.

No diff preview for this file type.

+45 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ import sklearn
import shutil
import tensorflow as tf
import deepchem as dc
import scipy.io
from tensorflow.python.framework import test_util
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
@@ -661,6 +662,50 @@ class TestOverfit(test_util.TensorFlowTestCase):

    assert scores[classification_metric.name] < .2

  def test_DTNN_multitask_regression_overfit(self):
    """Test deep tensor neural net overfits tiny data."""
    np.random.seed(123)
    tf.set_random_seed(123)

    # Load mini log-solubility dataset.
    input_file = os.path.join(self.current_dir, "example_DTNN.mat")
    dataset = scipy.io.loadmat(input_file)
    X = dataset['X']
    y = dataset['T']
    w = np.ones_like(y)
    dataset = dc.data.DiskDataset.from_numpy(X, y, w, ids=None)
    regression_metric = dc.metrics.Metric(
        dc.metrics.pearson_r2_score, task_averager=np.mean)
    n_tasks = y.shape[1]
    max_n_atoms = list(dataset.get_data_shape())[0]
    batch_size = 10

    graph_model = dc.nn.SequentialDTNNGraph(max_n_atoms=max_n_atoms)
    graph_model.add(dc.nn.DTNNEmbedding(n_embedding=20))
    graph_model.add(dc.nn.DTNNStep(n_embedding=20))
    graph_model.add(dc.nn.DTNNStep(n_embedding=20))
    graph_model.add(dc.nn.DTNNGather(n_embedding=20))
    n_feat = 20
    model = dc.models.DTNNGraphRegressor(
        graph_model,
        n_tasks,
        n_feat,
        batch_size=batch_size,
        learning_rate=1e-3,
        learning_rate_decay_time=1000,
        optimizer_type="adam",
        beta1=.9,
        beta2=.999)

    # Fit trained model
    model.fit(dataset, nb_epoch=20)
    model.save()

    # Eval model on train
    scores = model.evaluate(dataset, [regression_metric])

    assert scores[regression_metric.name] > .9

  def test_siamese_singletask_classification_overfit(self):
    """Test siamese singletask model overfits tiny data."""
    np.random.seed(123)
+28 −0
Original line number Diff line number Diff line
import tensorflow as tf
from deepchem.models.tf_new_models.multitask_regressor import MultitaskGraphRegressor


class DTNNGraphRegressor(MultitaskGraphRegressor):

  def build(self):
    # Create target inputs
    self.label_placeholder = tf.placeholder(
        dtype='float32', shape=(None, self.n_tasks), name="label_placeholder")
    self.weight_placeholder = tf.placeholder(
        dtype='float32', shape=(None, self.n_tasks), name="weight_placholder")

    feat = self.model.return_outputs()
    feat_size = self.feat_dim
    outputs = []
    W_list = []
    b_list = []
    for task in range(self.n_tasks):
      W_list.append(
          tf.Variable(
              tf.truncated_normal([feat_size, 1], stddev=0.01),
              name='w',
              dtype=tf.float32))
      b_list.append(tf.Variable(tf.zeros([1]), name='b', dtype=tf.float32))
      outputs.append(
          tf.squeeze(tf.nn.xw_plus_b(feat, W_list[task], b_list[task])))
    return outputs
+50 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ __license__ = "MIT"

import tensorflow as tf
from deepchem.nn.layers import GraphGather
from deepchem.models.tf_new_models.graph_topology import GraphTopology
from deepchem.models.tf_new_models.graph_topology import GraphTopology, DTNNGraphTopology


class SequentialGraph(object):
@@ -80,6 +80,55 @@ class SequentialGraph(object):
    return self.layers[layer_id]


class SequentialDTNNGraph(SequentialGraph):
  """An analog of Keras Sequential class for Coulomb Matrix data.

  automatically generates and passes topology placeholders to each layer. 
  """

  def __init__(self,
               max_n_atoms,
               n_distance=100,
               distance_min=-1.,
               distance_max=18.):
    """
    Parameters
    ----------
    max_n_atoms: int
      maximum number of atoms in a molecule
    n_distance: int, optional
      granularity of distance matrix
      step size will be (distance_max-distance_min)/n_distance
    distance_min: float, optional
      minimum distance of atom pairs, default = -1 Angstorm
    distance_max: float, optional
      maximum distance of atom pairs, default = 18 Angstorm
    """
    self.graph = tf.Graph()
    with self.graph.as_default():
      self.graph_topology = DTNNGraphTopology(
          max_n_atoms,
          n_distance,
          distance_min=distance_min,
          distance_max=distance_max)
      self.output = self.graph_topology.get_atom_number_placeholder()
    # Keep track of the layers
    self.layers = []

  def add(self, layer):
    """Adds a new layer to model."""
    with self.graph.as_default():
      if type(layer).__name__ in ['DTNNStep']:
        self.output = layer([self.output] +
                            self.graph_topology.get_topology_placeholders())
      elif type(layer).__name__ in ['DTNNGather']:
        self.output = layer(
            [self.output, self.graph_topology.atom_mask_placeholder])
      else:
        self.output = layer(self.output)
      self.layers.append(layer)


class SequentialSupportGraph(object):
  """An analog of Keras Sequential model for test/support models."""

Loading