Commit c7d7196c authored by miaecle's avatar miaecle
Browse files

ANI Regression

parent 1e545237
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -29,4 +29,4 @@ from deepchem.models.tensorflow_models.progressive_joint import ProgressiveJoint
from deepchem.models.tensorflow_models.IRV import TensorflowMultiTaskIRVClassifier
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, DTNNTensorGraph, DAGTensorGraph, GraphConvTensorGraph
from deepchem.models.tensorgraph.models.symmetry_function_regression import BPSymmetryFunctionRegression
from deepchem.models.tensorgraph.models.symmetry_function_regression import BPSymmetryFunctionRegression, ANIRegression
+120 −1
Original line number Diff line number Diff line
@@ -13,7 +13,8 @@ from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding
from deepchem.models.tensorgraph.symmetry_functions import DistanceMatrix, \
    DistanceCutoff, RadialSymmetry, AngularSymmetry, BPFeatureMerge, BPGather
    DistanceCutoff, RadialSymmetry, AngularSymmetry, AngularSymmetryMod, \
    BPFeatureMerge, BPGather, AtomicDifferentiatedDense


class BPSymmetryFunctionRegression(TensorGraph):
@@ -111,3 +112,121 @@ class BPSymmetryFunctionRegression(TensorGraph):
        feed_dict[self.atom_numbers] = np.array(X_b[:, :, 0], dtype=int)
        feed_dict[self.atom_coordinates] = np.array(X_b[:, :, 1:], dtype=float)
        yield feed_dict

class ANIRegression(TensorGraph):

  def __init__(self, 
               n_tasks, 
               max_atoms, 
               n_hidden=40, 
               n_embedding=10, 
               atom_number_cases=[1, 6, 7, 8],
               **kwargs):
    """
    Parameters
    ----------
    n_tasks: int
      Number of tasks
    max_atoms: int
      Maximum number of atoms in the dataset
    n_hidden: int, optional
      Number of hidden units in the readout function
    """
    self.n_tasks = n_tasks
    self.max_atoms = max_atoms
    self.n_hidden = n_hidden
    self.n_embedding = n_embedding
    self.atom_number_cases = atom_number_cases
    super(ANIRegression, self).__init__(**kwargs)
    self.build_graph()

  def build_graph(self):
    self.atom_numbers = Feature(shape=(None, self.max_atoms), dtype=tf.int32)
    self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
    self.atom_coordinates = Feature(shape=(None, self.max_atoms, 3))

    distance_matrix = DistanceMatrix(
        self.max_atoms, in_layers=[self.atom_coordinates, self.atom_flags])
    distance_cutoff_radial = DistanceCutoff(
        self.max_atoms,
        cutoff=4.6 / 0.52917721092,
        in_layers=[distance_matrix, self.atom_flags])
    distance_cutoff_angular = DistanceCutoff(
        self.max_atoms,
        cutoff=3.1 / 0.52917721092,
        in_layers=[distance_matrix, self.atom_flags])
    radial_symmetry = RadialSymmetry(
        self.max_atoms,
        atomic_number_differentiated=True,
        atom_numbers=self.atom_number_cases,
        in_layers=[distance_cutoff_radial, distance_matrix, self.atom_numbers])
    angular_symmetry = AngularSymmetryMod(
        self.max_atoms,
        atomic_number_differentiated=True,
        atom_numbers=self.atom_number_cases,
        in_layers=[distance_cutoff_angular, distance_matrix, self.atom_coordinates, self.atom_numbers])
    atom_embedding = DTNNEmbedding(
        n_embedding=0, in_layers=[self.atom_numbers])

    feature_merge = BPFeatureMerge(
        self.max_atoms,
        in_layers=[
            atom_embedding, radial_symmetry, angular_symmetry, self.atom_flags
        ])

    Hidden = AtomicDifferentiatedDense(
        self.max_atoms,
        self.n_hidden,
        self.atom_number_cases,
        activation='tanh',
        in_layers=[feature_merge, self.atom_numbers])
    
    Hidden2 = AtomicDifferentiatedDense(
        self.max_atoms,
        self.n_hidden,
        self.atom_number_cases,
        activation='tanh',
        in_layers=[Hidden, self.atom_numbers])
    
    costs = []
    self.labels_fd = []
    for task in range(self.n_tasks):
      regression = Dense(
          out_channels=1, activation_fn=None, in_layers=[Hidden2])
      output = BPGather(self.max_atoms, in_layers=[regression, self.atom_flags])
      self.add_output(output)

      label = Label(shape=(None, 1))
      self.labels_fd.append(label)
      cost = L2Loss(in_layers=[label, output])
      costs.append(cost)

    all_cost = Concat(in_layers=costs, axis=0)
    self.weights = Weights(shape=(None, self.n_tasks))
    loss = WeightedError(in_layers=[all_cost, self.weights])
    self.set_loss(loss)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=True,
          pad_batches=pad_batches):

        feed_dict = dict()
        if y_b is not None and not predict:
          for index, label in enumerate(self.labels_fd):
            feed_dict[label] = y_b[:, index:index + 1]
        if w_b is not None and not predict:
          feed_dict[self.weights] = w_b

        flags = np.sign(np.array(X_b[:, :, 0]))
        feed_dict[self.atom_flags] = np.stack([flags]*self.max_atoms, axis=2)*\
            np.stack([flags]*self.max_atoms, axis=1)
        feed_dict[self.atom_numbers] = np.array(X_b[:, :, 0], dtype=int)
        feed_dict[self.atom_coordinates] = np.array(X_b[:, :, 1:], dtype=float)
        yield feed_dict
 No newline at end of file
+42 −2
Original line number Diff line number Diff line
@@ -116,7 +116,7 @@ class RadialSymmetry(Layer):
    Rs_init, ita_init = np.meshgrid(self.Rs_init, self.ita_init)
    self.Rs = tf.constant(Rs_init.flatten(), dtype=tf.float32)
    self.ita = tf.constant(ita_init.flatten(), dtype=tf.float32)
    self.atom_number_embedding = tf.eye(max(self.atom_number_cases))
    self.atom_number_embedding = tf.eye(max(self.atom_number_cases)+1)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Generate Radial Symmetry Function """
@@ -289,7 +289,7 @@ class AngularSymmetryMod(Layer):
    self.zeta = tf.constant(zeta_init.flatten(), dtype=tf.float32)
    self.Rs = tf.constant(Rs_init.flatten(), dtype=tf.float32)
    self.thetas = tf.constant(thetas_init.flatten(), dtype=tf.float32)
    self.atom_number_embedding = tf.eye(max(self.atom_number_cases))
    self.atom_number_embedding = tf.eye(max(self.atom_number_cases)+1)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Generate Angular Symmetry Function """
@@ -389,3 +389,43 @@ class BPGather(Layer):

    out_tensor = tf.reduce_sum(out_tensor * flags[:, :, 0:1], axis=1)
    self.out_tensor = out_tensor


class AtomicDifferentiatedDense(Layer):
  """ Separate Dense module for different atoms """

  def __init__(self,
               max_atoms,
               out_channels,
               atom_number_cases=[1, 6, 7, 8],
               init='glorot_uniform',
               activation='relu',
               **kwargs):
    self.init = initializations.get(init)  # Set weight initialization
    self.activation = activations.get(activation)  # Get activations
    self.max_atoms = max_atoms
    self.out_channels = out_channels
    self.atom_number_cases = atom_number_cases
    
    super(AtomicDifferentiatedDense, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Generate Radial Symmetry Function """
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)

    inputs = in_layers[0].out_tensor
    atom_numbers = in_layers[1].out_tensor
    in_channels = inputs.get_shape().as_list()[-1]
    in_dimension = len(inputs.get_shape())
    self.W = self.init([max(self.atom_number_cases), in_channels, self.out_channels])
    self.b = model_ops.zeros((max(self.atom_number_cases), self.out_channels))
    W = tf.nn.embedding_lookup(self.W, atom_numbers)
    b = tf.nn.embedding_lookup(self.b, atom_numbers)
    outputs = tf.expand_dims(inputs, in_dimension) * W
    outputs = tf.reduce_sum(outputs, in_dimension-1) + b
    outputs = self.activation(outputs)
    self.out_tensor = outputs

+54 −0
Original line number Diff line number Diff line
"""
Script that trains ANI models on qm7 dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import numpy as np
np.random.seed(123)
import tensorflow as tf
tf.set_random_seed(123)
import deepchem as dc

# Load Tox21 dataset
tasks, datasets, transformers = dc.molnet.load_qm7_from_mat(
    featurizer='BPSymmetryFunction')
train_dataset, valid_dataset, test_dataset = datasets

# Fit models
metric = [
    dc.metrics.Metric(dc.metrics.mean_absolute_error, mode="regression"),
    dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
]

# Batch size of models
max_atoms = 23
n_hidden = 40
n_embedding = 0
batch_size = 16
atom_number_cases = [1, 6, 7, 8]

model = dc.models.ANIRegression(
    len(tasks),
    max_atoms,
    n_hidden=n_hidden,
    n_embedding=n_embedding,
    atom_number_cases=atom_number_cases,
    batch_size=batch_size,
    learning_rate=0.001,
    use_queue=False,
    mode="regression")

# Fit trained model
model.fit(train_dataset, nb_epoch=20, checkpoint_interval=1000)

print("Evaluating model")
train_scores = model.evaluate(train_dataset, metric, transformers)
valid_scores = model.evaluate(valid_dataset, metric, transformers)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)
+1 −3
Original line number Diff line number Diff line
"""
Script that trains DTNN models on qm7 dataset.
"""

from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals