Commit b9cfdbc2 authored by miaecle's avatar miaecle
Browse files

Symmetry Function for QM7

parent cf440e2c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ from deepchem.feat.fingerprints import CircularFingerprint
from deepchem.feat.basic import RDKitDescriptors
from deepchem.feat.coulomb_matrices import CoulombMatrix
from deepchem.feat.coulomb_matrices import CoulombMatrixEig
from deepchem.feat.coulomb_matrices import BPSymmetryFunction
from deepchem.feat.rdkit_grid_featurizer import RdkitGridFeaturizer
from deepchem.feat.nnscore_utils import hydrogenate_and_compute_partial_charges
from deepchem.feat.binding_pocket_features import BindingPocketFeaturizer
+21 −0
Original line number Diff line number Diff line
@@ -16,6 +16,27 @@ import deepchem as dc
from rdkit import Chem
from deepchem.feat import Featurizer
from deepchem.utils import pad_array
from deepchem.feat.atomic_coordinates import AtomicCoordinates


class BPSymmetryFunction(Featurizer):
  """
  Calculate Symmetry Function for each atom in the molecules
  Methods described in https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.98.146401
  """

  def __init__(self, max_atoms):
    self.max_atoms = max_atoms

  def _featurize(self, mol):
    coordfeat = AtomicCoordinates()
    coordinates = coordfeat._featurize(mol)[0]
    atom_numbers = np.array([atom.GetAtomicNum() for atom in mol.GetAtoms()])
    atom_numbers = np.expand_dims(atom_numbers, axis=1)
    assert atom_numbers.shape[0] == coordinates.shape[0]
    n_atoms = atom_numbers.shape[0]
    features = np.concatenate([atom_numbers, coordinates], axis=1)
    return np.pad(features, ((0, self.max_atoms - n_atoms), (0, 0)), 'constant')


class CoulombMatrix(Featurizer):
+1 −0
Original line number Diff line number Diff line
@@ -29,3 +29,4 @@ from deepchem.models.tensorflow_models.progressive_joint import ProgressiveJoint
from deepchem.models.tensorflow_models.IRV import TensorflowMultiTaskIRVClassifier
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, DTNNTensorGraph, DAGTensorGraph, GraphConvTensorGraph
from deepchem.models.tensorgraph.models.symmetry_function_regression import BPSymmetryFunctionRegression
+107 −0
Original line number Diff line number Diff line
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Jul  6 20:31:47 2017

@author: zqwu
"""
import numpy as np
import tensorflow as tf

from deepchem.models.tensorgraph.layers import Dense, Concat, WeightedError
from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding
from deepchem.models.tensorgraph.symmetry_functions import DistanceMatrix, \
    DistanceCutoff, RadialSymmetry, AngularSymmetry, BPFeatureMerge, BPGather


class BPSymmetryFunctionRegression(TensorGraph):

  def __init__(self, n_tasks, max_atoms, n_hidden=10, **kwargs):
    """
    Parameters
    ----------
    n_tasks: int
      Number of tasks
    max_atoms: int
      Maximum number of atoms in the dataset
    n_hidden: int, optional
      Number of hidden units in the readout function
    """
    self.n_tasks = n_tasks
    self.max_atoms = max_atoms
    self.n_hidden = n_hidden
    super(BPSymmetryFunctionRegression, self).__init__(**kwargs)
    self.build_graph()

  def build_graph(self):
    self.atom_numbers = Feature(shape=(None, self.max_atoms), dtype=tf.int32)
    self.atom_flags = Feature(shape=(None, self.max_atoms, self.max_atoms))
    self.atom_coordinates = Feature(shape=(None, self.max_atoms, 3))

    distance_matrix = DistanceMatrix(
        self.max_atoms, in_layers=[self.atom_coordinates, self.atom_flags])
    distance_cutoff = DistanceCutoff(
        self.max_atoms,
        cutoff=6 / 0.52917721092,
        in_layers=[distance_matrix, self.atom_flags])
    radial_symmetry = RadialSymmetry(
        self.max_atoms, in_layers=[distance_cutoff, distance_matrix])
    angular_symmetry = AngularSymmetry(
        self.max_atoms,
        in_layers=[distance_cutoff, distance_matrix, self.atom_coordinates])
    atom_embedding = DTNNEmbedding(n_embedding=5, in_layers=[self.atom_numbers])

    feature_merge = BPFeatureMerge(
        self.max_atoms,
        in_layers=[
            atom_embedding, radial_symmetry, angular_symmetry, self.atom_flags
        ])

    Hidden = Dense(
        out_channels=self.n_hidden,
        activation_fn=tf.nn.tanh,
        in_layers=[feature_merge])

    costs = []
    self.labels_fd = []
    for task in range(self.n_tasks):
      regression = Dense(out_channels=1, activation_fn=None, in_layers=[Hidden])
      output = BPGather(self.max_atoms, in_layers=[regression, self.atom_flags])
      self.add_output(output)

      label = Label(shape=(None, 1))
      self.labels_fd.append(label)
      cost = L2Loss(in_layers=[label, output])
      costs.append(cost)

    all_cost = Concat(in_layers=costs, axis=0)
    self.weights = Weights(shape=(None, self.n_tasks))
    loss = WeightedError(in_layers=[all_cost, self.weights])
    self.set_loss(loss)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=True,
          pad_batches=pad_batches):

        feed_dict = dict()
        if y_b is not None and not predict:
          for index, label in enumerate(self.labels_fd):
            feed_dict[label] = y_b[:, index:index + 1]
        if w_b is not None and not predict:
          feed_dict[self.weights] = w_b

        flags = np.sign(np.array(X_b[:, :, 0]))
        feed_dict[self.atom_flags] = np.stack([flags]*self.max_atoms, axis=2)*\
            np.stack([flags]*self.max_atoms, axis=1)
        feed_dict[self.atom_numbers] = np.array(X_b[:, :, 0], dtype=int)
        feed_dict[self.atom_coordinates] = np.array(X_b[:, :, 1:], dtype=float)
        yield feed_dict
+208 −0
Original line number Diff line number Diff line
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Jul  6 20:43:23 2017

@author: zqwu
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import numpy as np
import tensorflow as tf
from deepchem.nn import activations
from deepchem.nn import initializations
from deepchem.nn import model_ops

from deepchem.models.tensorgraph.layers import Layer
from deepchem.models.tensorgraph.layers import convert_to_layers


class DistanceMatrix(Layer):
  """ TensorGraph style implementation
    The same as deepchem.nn.WeaveGather
    """

  def __init__(self, max_atoms, **kwargs):
    """
    Parameters
    ----------
    max_atoms: int
      Maximum number of atoms in the dataset
    """
    self.max_atoms = max_atoms
    super(DistanceMatrix, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Generate distance matrix for BPSymmetryFunction with trainable cutoff """
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)

    max_atoms = self.max_atoms
    atom_coordinates = in_layers[0].out_tensor
    atom_flags = in_layers[1].out_tensor
    tensor1 = tf.tile(
        tf.expand_dims(atom_coordinates, axis=2), (1, 1, max_atoms, 1))
    tensor2 = tf.tile(
        tf.expand_dims(atom_coordinates, axis=1), (1, max_atoms, 1, 1))
    # Calculate pairwise distance
    d = tf.sqrt(tf.reduce_sum(tf.square(tensor1 - tensor2), axis=3))
    # Masking for valid atom index
    self.out_tensor = d * tf.to_float(atom_flags)


class DistanceCutoff(Layer):
  """ TensorGraph style implementation
    The same as deepchem.nn.WeaveGather
    """

  def __init__(self, max_atoms, cutoff=6 / 0.52917721092, **kwargs):
    """
    Parameters
    ----------
    cutoff: float, optional
      cutoff threshold for distance, in Bohr(0.53Angstrom)
    """
    self.max_atoms = max_atoms
    self.cutoff = cutoff
    super(DistanceCutoff, self).__init__(**kwargs)

  def build(self):
    self.Rc = tf.Variable(tf.constant(self.cutoff))

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Generate distance matrix for BPSymmetryFunction with trainable cutoff """
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)

    self.build()
    d = in_layers[0].out_tensor
    d_flag = in_layers[1].out_tensor
    # Cutoff with threshold Rc
    d_flag = d_flag * tf.nn.relu(tf.sign(self.Rc - d))
    d = 0.5 * (tf.cos(np.pi * d / self.Rc) + 1)
    out_tensor = d * d_flag
    out_tensor = out_tensor * tf.expand_dims((1 - tf.eye(self.max_atoms)), 0)
    self.out_tensor = out_tensor


class RadialSymmetry(Layer):
  """ Radial Symmetry Function """

  def __init__(self, max_atoms, **kwargs):
    self.max_atoms = max_atoms
    super(RadialSymmetry, self).__init__(**kwargs)

  def build(self):
    """ Parameters for the Gaussian """
    self.Rs = tf.Variable(tf.constant(0.))
    #self.ita = tf.Variable(tf.constant(1.))
    self.ita = 1.

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Generate Radial Symmetry Function """
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)

    self.build()
    d_cutoff = in_layers[0].out_tensor
    d = in_layers[1].out_tensor
    out_tensor = tf.exp(-self.ita * tf.square(d - self.Rs)) * d_cutoff
    self.out_tensor = tf.reduce_sum(out_tensor, axis=2)


class AngularSymmetry(Layer):
  """ Angular Symmetry Function """

  def __init__(self, max_atoms, **kwargs):
    self.max_atoms = max_atoms
    super(AngularSymmetry, self).__init__(**kwargs)

  def build(self):
    #self.lambd = tf.Variable(tf.constant(1.))
    #self.ita = tf.Variable(tf.constant(1.))
    #self.zeta = tf.Variable(tf.constant(1.))
    self.lambd = 1.
    self.ita = 1.
    self.zeta = 0.8

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Generate Angular Symmetry Function """
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)

    self.build()
    max_atoms = self.max_atoms
    d_cutoff = in_layers[0].out_tensor
    d = in_layers[1].out_tensor
    atom_coordinates = in_layers[2].out_tensor
    vector_distances = tf.tile(tf.expand_dims(atom_coordinates, axis=2), (1,1,max_atoms,1)) - \
        tf.tile(tf.expand_dims(atom_coordinates, axis=1), (1,max_atoms,1,1))
    R_ij = tf.tile(tf.expand_dims(d, axis=3), (1, 1, 1, max_atoms))
    R_ik = tf.tile(tf.expand_dims(d, axis=2), (1, 1, max_atoms, 1))
    R_jk = tf.tile(tf.expand_dims(d, axis=1), (1, max_atoms, 1, 1))
    f_R_ij = tf.tile(tf.expand_dims(d_cutoff, axis=3), (1, 1, 1, max_atoms))
    f_R_ik = tf.tile(tf.expand_dims(d_cutoff, axis=2), (1, 1, max_atoms, 1))
    f_R_jk = tf.tile(tf.expand_dims(d_cutoff, axis=1), (1, max_atoms, 1, 1))

    # Define angle theta = R_ij(Vector) dot R_ik(Vector)/R_ij(distance)/R_ik(distance)
    theta = tf.reduce_sum(tf.tile(tf.expand_dims(vector_distances, axis=3), (1,1,1,max_atoms,1)) * \
        tf.tile(tf.expand_dims(vector_distances, axis=2), (1,1,max_atoms,1,1)), axis=4)

    theta = tf.div(theta, R_ij * R_ik)

    out_tensor = tf.pow(1+self.lambd*tf.cos(theta), self.zeta) * \
        tf.exp(-self.ita*(tf.square(R_ij)+tf.square(R_ik)+tf.square(R_jk))) * \
        f_R_ij * f_R_ik * f_R_jk
    self.out_tensor = tf.reduce_sum(out_tensor, axis=[2, 3]) * \
        tf.pow(tf.constant(2.), 1-self.zeta)


class BPFeatureMerge(Layer):

  def __init__(self, max_atoms, **kwargs):
    self.max_atoms = max_atoms
    super(BPFeatureMerge, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Merge features together """
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)

    atom_embedding = in_layers[0].out_tensor
    radial_symmetry = in_layers[1].out_tensor
    angular_symmetry = in_layers[2].out_tensor
    atom_flags = in_layers[3].out_tensor

    out_tensor = tf.concat(
        [
            atom_embedding, tf.expand_dims(radial_symmetry, 2),
            tf.expand_dims(angular_symmetry, 2)
        ],
        axis=2)
    self.out_tensor = out_tensor * atom_flags[:, :, 0:1]


class BPGather(Layer):

  def __init__(self, max_atoms, **kwargs):
    self.max_atoms = max_atoms
    super(BPGather, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """ Merge features together """
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)

    out_tensor = in_layers[0].out_tensor
    flags = in_layers[1].out_tensor

    out_tensor = tf.reduce_sum(out_tensor * flags[:, :, 0:1], axis=1)
    self.out_tensor = out_tensor
Loading