Unverified Commit e04d9c9a authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1226 from peastman/graphmodels

Refactor graph models
parents 034dab84 69397d19
Loading
Loading
Loading
Loading
+6 −10
Original line number Diff line number Diff line
@@ -34,7 +34,6 @@ class WeaveLayer(Layer):
               update_pair=True,
               init='glorot_uniform',
               activation='relu',
               dropout=None,
               **kwargs):
    """
    Parameters
@@ -56,9 +55,6 @@ class WeaveLayer(Layer):
      Weight initialization for filters.
    activation: str, optional
      Activation function applied
    dropout: float, optional
      Dropout probability, not supported here

    """
    super(WeaveLayer, self).__init__(**kwargs)
    self.init = init  # Set weight initialization
@@ -286,7 +282,7 @@ class WeaveGather(Layer):
    dist_max = [dist[i].prob(gaussian_memberships[i][0]) for i in range(11)]
    outputs = [dist[i].prob(x) / dist_max[i] for i in range(11)]
    outputs = tf.stack(outputs, axis=2)
    outputs = outputs / tf.reduce_sum(outputs, axis=2, keep_dims=True)
    outputs = outputs / tf.reduce_sum(outputs, axis=2, keepdims=True)
    outputs = tf.reshape(outputs, [-1, self.n_input * 11])
    return outputs

+15 −1
Original line number Diff line number Diff line
@@ -371,7 +371,7 @@ class TensorWrapper(Layer):
  def __init__(self, out_tensor, **kwargs):
    super(TensorWrapper, self).__init__(**kwargs)
    self.out_tensor = out_tensor
    self._shape = out_tensor.get_shape().as_list()
    self._shape = tuple(out_tensor.get_shape().as_list())

  def create_tensor(self, in_layers=None, **kwargs):
    """Take no actions."""
@@ -2529,6 +2529,11 @@ class GraphConv(Layer):
    self.num_deg = 2 * max_deg + (1 - min_deg)
    self.activation_fn = activation_fn
    super(GraphConv, self).__init__(**kwargs)
    try:
      parent_shape = self.in_layers[0].shape
      self._shape = (parent_shape[0], out_channel)
    except:
      pass

  def _create_variables(self, in_channels):
    # Generate the nb_affine weights and biases
@@ -2640,6 +2645,10 @@ class GraphPool(Layer):
    self.min_degree = min_degree
    self.max_degree = max_degree
    super(GraphPool, self).__init__(**kwargs)
    try:
      self._shape = self.in_layers[0].shape
    except:
      pass

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
@@ -2689,6 +2698,11 @@ class GraphGather(Layer):
    self.batch_size = batch_size
    self.activation_fn = activation_fn
    super(GraphGather, self).__init__(**kwargs)
    try:
      parent_shape = self.in_layers[0].shape
      self._shape = (batch_size, 2 * parent_shape[1])
    except:
      pass

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
+370 −470

File changed.

Preview size limit exceeded, changes collapsed.

+251 −22
Original line number Diff line number Diff line
import unittest

import os
import numpy as np
import scipy

import deepchem
import deepchem as dc
from deepchem.data import NumpyDataset
from deepchem.models import GraphConvModel
from deepchem.models import GraphConvModel, DAGModel, WeaveModel, MPNNModel
from deepchem.models import TensorGraph
from deepchem.molnet import load_bace_classification, load_delaney
from deepchem.models.tensorgraph.layers import ReduceSum, L2Loss
from deepchem.models import WeaveModel
from deepchem.feat import ConvMolFeaturizer
from nose.plugins.attrib import attr
from flaky import flaky


class TestGraphModels(unittest.TestCase):
@@ -31,14 +33,14 @@ class TestGraphModels(unittest.TestCase):

    if mode == 'classification':
      y = np.random.randint(0, 2, size=(data_points, len(tasks)))
      metric = deepchem.metrics.Metric(
          deepchem.metrics.roc_auc_score, np.mean, mode="classification")
      metric = dc.metrics.Metric(
          dc.metrics.roc_auc_score, np.mean, mode="classification")
    else:
      y = np.random.normal(size=(data_points, len(tasks)))
      metric = deepchem.metrics.Metric(
          deepchem.metrics.mean_absolute_error, mode="regression")
      metric = dc.metrics.Metric(
          dc.metrics.mean_absolute_error, mode="regression")

    ds = NumpyDataset(train.X[:10], y, w, train.ids[:10])
    ds = NumpyDataset(train.X[:data_points], y, w, train.ids[:data_points])

    return tasks, ds, transformers, metric

@@ -50,12 +52,15 @@ class TestGraphModels(unittest.TestCase):
    model = GraphConvModel(
        len(tasks), batch_size=batch_size, mode='classification')

    model.fit(dataset, nb_epoch=1)
    model.fit(dataset, nb_epoch=10)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores = model.evaluate(dataset, [metric], transformers)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])

  def test_graph_conv_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
@@ -64,26 +69,38 @@ class TestGraphModels(unittest.TestCase):
    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')

    model.fit(dataset, nb_epoch=1)
    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores = model.evaluate(dataset, [metric], transformers)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean_absolute_error'],
                       scores2['mean_absolute_error'])

  def test_graph_conv_error_bars(self):
  def test_graph_conv_regression_uncertainty(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv', num_tasks=1)
        'regression', 'GraphConv')

    batch_size = 50
    model = GraphConvModel(len(tasks), batch_size=batch_size, mode='regression')
    model = GraphConvModel(
        len(tasks),
        batch_size=batch_size,
        mode='regression',
        dropout=0.1,
        uncertainty=True)

    model.fit(dataset, nb_epoch=1)
    model.fit(dataset, nb_epoch=100)

    mu, sigma = model.bayesian_predict(
        dataset, transformers, untransform=True, n_passes=24)
    assert mu.shape == (len(dataset), len(tasks))
    assert sigma.shape == (len(dataset), len(tasks))
    # Predict the output and uncertainty.
    pred, std = model.predict_uncertainty(dataset)
    mean_error = np.mean(np.abs(dataset.y - pred))
    mean_value = np.mean(np.abs(dataset.y))
    mean_std = np.mean(std)
    assert mean_error < 0.5 * mean_value
    assert mean_std > 0.5 * mean_error
    assert mean_std < mean_value

  def test_graph_conv_atom_features(self):
    tasks, dataset, transformers, metric = self.get_dataset(
@@ -102,7 +119,7 @@ class TestGraphModels(unittest.TestCase):

    featurizer = ConvMolFeaturizer(atom_properties=[atom_feature_name])
    X = featurizer.featurize(dataset.X)
    dataset = deepchem.data.NumpyDataset(X, np.array(y))
    dataset = dc.data.NumpyDataset(X, np.array(y))
    batch_size = 50
    model = GraphConvModel(
        len(tasks),
@@ -154,3 +171,215 @@ class TestGraphModels(unittest.TestCase):
    module = model2.create_submodel(loss=loss)
    model2.restore()
    model2.fit(dataset, nb_epoch=1, submodel=module)

  @attr("slow")
  def test_weave_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'Weave')

    model = WeaveModel(len(tasks), mode='classification')

    model.fit(dataset, nb_epoch=50)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])

  @flaky
  def test_weave_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'Weave')

    model = WeaveModel(len(tasks), mode='regression')

    model.fit(dataset, nb_epoch=80)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean_absolute_error'],
                       scores2['mean_absolute_error'])

  @attr("slow")
  def test_dag_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'GraphConv')

    max_atoms = max([mol.get_num_atoms() for mol in dataset.X])
    transformer = dc.trans.DAGTransformer(max_atoms=max_atoms)
    dataset = transformer.transform(dataset)

    model = DAGModel(
        len(tasks), max_atoms=max_atoms, mode='classification', use_queue=False)

    model.fit(dataset, nb_epoch=10)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])

  @attr("slow")
  def test_dag_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv')

    max_atoms = max([mol.get_num_atoms() for mol in dataset.X])
    transformer = dc.trans.DAGTransformer(max_atoms=max_atoms)
    dataset = transformer.transform(dataset)

    model = DAGModel(
        len(tasks),
        max_atoms=max_atoms,
        mode='regression',
        learning_rate=0.003,
        use_queue=False)

    model.fit(dataset, nb_epoch=100)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.15 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean_absolute_error'],
                       scores2['mean_absolute_error'])

  @attr("slow")
  def test_dag_regression_uncertainty(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'GraphConv')

    max_atoms = max([mol.get_num_atoms() for mol in dataset.X])
    transformer = dc.trans.DAGTransformer(max_atoms=max_atoms)
    dataset = transformer.transform(dataset)

    model = DAGModel(
        len(tasks),
        max_atoms=max_atoms,
        mode='regression',
        learning_rate=0.002,
        use_queue=False,
        dropout=0.1,
        uncertainty=True)

    model.fit(dataset, nb_epoch=100)

    # Predict the output and uncertainty.
    pred, std = model.predict_uncertainty(dataset)
    mean_error = np.mean(np.abs(dataset.y - pred))
    mean_value = np.mean(np.abs(dataset.y))
    mean_std = np.mean(std)
    assert mean_error < 0.5 * mean_value
    assert mean_std > 0.5 * mean_error
    assert mean_std < mean_value

  @attr("slow")
  def test_mpnn_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'classification', 'Weave')

    model = MPNNModel(
        len(tasks),
        mode='classification',
        n_hidden=75,
        n_atom_feat=75,
        n_pair_feat=14,
        T=1,
        M=1)

    model.fit(dataset, nb_epoch=20)
    scores = model.evaluate(dataset, [metric], transformers)
    assert scores['mean-roc_auc_score'] >= 0.9

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean-roc_auc_score'],
                       scores2['mean-roc_auc_score'])

  @attr("slow")
  def test_mpnn_regression_model(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'Weave')

    model = MPNNModel(
        len(tasks),
        mode='regression',
        n_hidden=75,
        n_atom_feat=75,
        n_pair_feat=14,
        T=1,
        M=1)

    model.fit(dataset, nb_epoch=50)
    scores = model.evaluate(dataset, [metric], transformers)
    assert all(s < 0.1 for s in scores['mean_absolute_error'])

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)
    scores2 = model.evaluate(dataset, [metric], transformers)
    assert np.allclose(scores['mean_absolute_error'],
                       scores2['mean_absolute_error'])

  @attr("slow")
  def test_mpnn_regression_uncertainty(self):
    tasks, dataset, transformers, metric = self.get_dataset(
        'regression', 'Weave')

    model = MPNNModel(
        len(tasks),
        mode='regression',
        n_hidden=75,
        n_atom_feat=75,
        n_pair_feat=14,
        T=1,
        M=1,
        dropout=0.1,
        uncertainty=True)

    model.fit(dataset, nb_epoch=40)

    # Predict the output and uncertainty.
    pred, std = model.predict_uncertainty(dataset)
    mean_error = np.mean(np.abs(dataset.y - pred))
    mean_value = np.mean(np.abs(dataset.y))
    mean_std = np.mean(std)
    assert mean_error < 0.5 * mean_value
    assert mean_std > 0.5 * mean_error
    assert mean_std < mean_value

  @flaky
  def test_dtnn_regression_model(self):
    current_dir = os.path.dirname(os.path.abspath(__file__))
    input_file = os.path.join(current_dir, "example_DTNN.mat")
    dataset = scipy.io.loadmat(input_file)
    X = dataset['X']
    y = dataset['T']
    w = np.ones_like(y)
    dataset = dc.data.NumpyDataset(X, y, w, ids=None)
    n_tasks = y.shape[1]

    model = dc.models.DTNNModel(
        n_tasks,
        n_embedding=20,
        n_distance=100,
        learning_rate=1.0,
        mode="regression")

    # Fit trained model
    model.fit(dataset, nb_epoch=250)

    # Eval model on train
    pred = model.predict(dataset)
    mean_rel_error = np.mean(np.abs(1 - pred / y))
    assert mean_rel_error < 0.1
Loading