Commit b675fbae authored by leswing's avatar leswing
Browse files

SAScore module

parent 305aea94
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@ from deepchem.models.tensorgraph.robust_multitask import RobustMultitaskRegresso
from deepchem.models.tensorgraph.progressive_multitask import ProgressiveMultitaskRegressor, ProgressiveMultitaskClassifier
from deepchem.models.tensorgraph.models.graph_models import WeaveModel, DTNNTensorGraph, DAGTensorGraph, GraphConvModel, MPNNTensorGraph
from deepchem.models.tensorgraph.models.symmetry_function_regression import BPSymmetryFunctionRegression, ANIRegression
from deepchem.models.tensorgraph.models.scscore import ScScoreModel

from deepchem.models.tensorgraph.models.seqtoseq import SeqToSeq
from deepchem.models.tensorgraph.models.gan import GAN, WGAN
+2 −2
Original line number Diff line number Diff line
@@ -4325,7 +4325,7 @@ class GraphCNN(Layer):
    return result


class Hingeloss(Layer):
class HingeLoss(Layer):
  """This layer computes the hinge loss on inputs:[labels,logits]
  labels: The values of this tensor is expected to be 1.0 or 0.0. The shape should be the same as logits.
  logits: Holds the log probabilities for labels, a float tensor.
@@ -4333,7 +4333,7 @@ class Hingeloss(Layer):
  """

  def __init__(self, in_layers=None, **kwargs):
    super(Hingeloss, self).__init__(in_layers, **kwargs)
    super(HingeLoss, self).__init__(in_layers, **kwargs)
    try:
      self._shape = self.in_layers[1].shape
    except:
+104 −0
Original line number Diff line number Diff line
import collections

import numpy as np
import six
import tensorflow as tf

from deepchem.data import NumpyDataset
from deepchem.feat.graph_features import ConvMolFeaturizer
from deepchem.feat.mol_graphs import ConvMol
from deepchem.metrics import to_one_hot
from deepchem.models.tensorgraph.graph_layers import WeaveGather, \
  DTNNEmbedding, DTNNStep, DTNNGather, DAGLayer, \
  DAGGather, DTNNExtract, MessagePassing, SetGather
from deepchem.models.tensorgraph.graph_layers import WeaveLayerFactory
from deepchem.models.tensorgraph.layers import Dense, SoftMax, \
  SoftMaxCrossEntropy, GraphConv, BatchNorm, HingeLoss, Sigmoid, \
  GraphPool, GraphGather, WeightedError, Dropout, BatchNormalization, Stack, Flatten, GraphCNN, GraphCNNPool, ReduceSum
from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.trans import undo_transforms
from deepchem.feat import CircularFingerprint


class ScScoreModel(TensorGraph):

  def __init__(self,
               n_features,
               layer_sizes=[300, 300, 300],
               dropouts=0.0,
               **kwargs):
    self.n_features = n_features
    self.layer_sizes = layer_sizes
    self.dropout = dropouts
    super(ScScoreModel, self).__init__(**kwargs)
    self.build_graph()

  def build_graph(self):
    """
    Building graph structures:
    """
    self.m1_features = Feature(shape=(None, self.n_features))
    self.m2_features = Feature(shape=(None, self.n_features))
    prev_layer1 = self.m1_features
    prev_layer2 = self.m2_features
    for layer_size in self.layer_sizes:
      prev_layer1 = Dense(
          out_channels=layer_size,
          in_layers=[prev_layer1],
          activation_fn=tf.nn.relu)
      prev_layer2 = prev_layer1.shared([prev_layer2])
      if self.dropout > 0.0:
        prev_layer1 = Dropout(self.dropout, in_layers=prev_layer1)
        prev_layer2 = Dropout(self.dropout, in_layers=prev_layer2)

    readout_m1 = Dense(
        out_channels=1, in_layers=[prev_layer1], activation_fn=None)
    readout_m2 = readout_m1.shared([prev_layer2])
    self.add_output(Sigmoid(readout_m1) * 4 + 1)
    self.add_output(Sigmoid(readout_m2) * 4 + 1)

    difference = readout_m1 - readout_m2
    label = Label(shape=(None, 1))
    loss = HingeLoss(in_layers=[label, difference])
    self.my_task_weights = Weights(shape=(None, 1))
    loss = WeightedError(in_layers=[loss, self.my_task_weights])
    self.set_loss(loss)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        deterministic=True,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=deterministic,
          pad_batches=pad_batches):
        feed_dict = dict()
        feed_dict[self.m1_features] = X_b[:, 0]
        feed_dict[self.m2_features] = X_b[:, 1]
        if y_b is not None and not predict:
          feed_dict[self.labels[0]] = y_b
        if w_b is not None and not predict:
          feed_dict[self.my_task_weights] = w_b
        yield feed_dict

  def predict_mols(self, mols):
    featurizer = CircularFingerprint(
        size=self.n_features, radius=3, chiral=True)
    features = np.expand_dims(featurizer.featurize(mols), axis=1)
    features = np.concatenate([features, features], axis=1)
    ds = NumpyDataset(features, None, None, None)
    return self.predict(ds)[0][:, 0]

  def create_estimator_inputs(self, feature_columns, weight_column, features,
                              labels, mode):
    tensors = {}
    for layer, column in zip([self.m1_features, self.m2_features],
                             feature_columns):
      tensors[layer] = tf.feature_column.input_layer(features, [column])
    if labels is not None:
      tensors[self.labels[0]] = tf.cast(labels, tf.int32)
    return tensors
+40 −0
Original line number Diff line number Diff line
import unittest

import numpy as np

import deepchem
from deepchem.data import NumpyDataset
from deepchem.models import GraphConvModel
from deepchem.models import TensorGraph
from deepchem.molnet.load_function.delaney_datasets import load_delaney
from deepchem.models.tensorgraph.layers import ReduceSum, L2Loss
from deepchem.models import WeaveModel
from deepchem.feat import ConvMolFeaturizer


class TestSaScoreModel(unittest.TestCase):

  def test_save_load(self):
    """Test creating an Estimator from a ScScoreModel."""
    n_samples = 10
    n_features = 3
    n_tasks = 1

    # Create a dataset and an input function for processing it.

    np.random.seed(123)
    X = np.random.rand(n_samples, 2, n_features)
    y = np.zeros((n_samples, n_tasks))
    dataset = deepchem.data.NumpyDataset(X, y)

    model = deepchem.models.ScScoreModel(n_features, dropouts=0)

    model.fit(dataset, nb_epoch=1)
    pred1 = model.predict(dataset)

    model.save()
    model = TensorGraph.load_from_dir(model.model_dir)

    pred2 = model.predict(dataset)
    for m1, m2 in zip(pred1, pred2):
      self.assertTrue(np.all(m1 == m2))
+50 −0
Original line number Diff line number Diff line
@@ -277,3 +277,53 @@ class TestEstimators(unittest.TestCase):

    results = estimator.evaluate(input_fn=lambda: input_fn(1))
    assert results['accuracy'] > 0.9

  def test_scscore(self):
    """Test creating an Estimator from a ScScoreModel."""
    n_samples = 10
    n_features = 3
    n_tasks = 1

    # Create a dataset and an input function for processing it.

    np.random.seed(123)
    X = np.random.rand(n_samples, 2, n_features)
    y = np.zeros((n_samples, n_tasks))
    dataset = dc.data.NumpyDataset(X, y)

    def input_fn(epochs):
      x, y, weights = dataset.make_iterator(
          batch_size=n_samples, epochs=epochs).get_next()
      x1 = x[:, 0]
      x2 = x[:, 1]
      return {'x1': x1, 'x2': x2, 'weights': weights}, y

    # Create a TensorGraph model.

    model = dc.models.ScScoreModel(n_features, dropouts=0)

    # Create an estimator from it.

    x_col1 = tf.feature_column.numeric_column('x1', shape=(n_features,))
    x_col2 = tf.feature_column.numeric_column('x2', shape=(n_features,))

    def accuracy(labels, predictions, weights):
      return tf.metrics.accuracy(labels, tf.round(predictions), weights)

    metrics = {'accuracy': accuracy}
    estimator = model.make_estimator(
        feature_columns=[x_col1, x_col2], metrics=metrics)

    # Train the model.

    estimator.train(input_fn=lambda: input_fn(100))

    # Evaluate the model.

    results = estimator.evaluate(input_fn=lambda: input_fn(1))
    print(results)
    assert results['loss'] < 1e-4
    # TODO(LESWING) Discuss with peastman.
    #  The output here is human readable
    # score 1-5 per molecule not a probability of class
    # assert results['accuracy'] > 0.9
Loading