Commit fbd977ac authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #667 from miaecle/BP2

Behler and Parrinello Symmetry functions and ANI-1
parents 1f702d08 010a4e12
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ from deepchem.feat.fingerprints import CircularFingerprint
from deepchem.feat.basic import RDKitDescriptors
from deepchem.feat.coulomb_matrices import CoulombMatrix
from deepchem.feat.coulomb_matrices import CoulombMatrixEig
from deepchem.feat.coulomb_matrices import BPSymmetryFunction
from deepchem.feat.rdkit_grid_featurizer import RdkitGridFeaturizer
from deepchem.feat.nnscore_utils import hydrogenate_and_compute_partial_charges
from deepchem.feat.binding_pocket_features import BindingPocketFeaturizer
+21 −0
Original line number Diff line number Diff line
@@ -16,6 +16,27 @@ import deepchem as dc
from rdkit import Chem
from deepchem.feat import Featurizer
from deepchem.utils import pad_array
from deepchem.feat.atomic_coordinates import AtomicCoordinates


class BPSymmetryFunction(Featurizer):
  """
  Calculate Symmetry Function for each atom in the molecules
  Methods described in https://journals.aps.org/prl/pdf/10.1103/PhysRevLett.98.146401
  """

  def __init__(self, max_atoms):
    self.max_atoms = max_atoms

  def _featurize(self, mol):
    coordfeat = AtomicCoordinates()
    coordinates = coordfeat._featurize(mol)[0]
    atom_numbers = np.array([atom.GetAtomicNum() for atom in mol.GetAtoms()])
    atom_numbers = np.expand_dims(atom_numbers, axis=1)
    assert atom_numbers.shape[0] == coordinates.shape[0]
    n_atoms = atom_numbers.shape[0]
    features = np.concatenate([atom_numbers, coordinates], axis=1)
    return np.pad(features, ((0, self.max_atoms - n_atoms), (0, 0)), 'constant')


class CoulombMatrix(Featurizer):
+2 −1
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ from deepchem.models.models import Model
from deepchem.models.sklearn_models import SklearnModel
from deepchem.models.xgboost_models import XGBoostModel
from deepchem.models.tf_new_models.multitask_classifier import MultitaskGraphClassifier
from deepchem.models.tf_new_models.multitask_regressor import MultitaskGraphRegressor
from deepchem.models.tf_new_models.multitask_regressor import MultitaskGraphRegressor, DTNNMultitaskGraphRegressor

from deepchem.models.tf_new_models.support_classifier import SupportGraphClassifier
from deepchem.models.multitask import SingletaskToMultitask
@@ -29,3 +29,4 @@ from deepchem.models.tensorflow_models.progressive_joint import ProgressiveJoint
from deepchem.models.tensorflow_models.IRV import TensorflowMultiTaskIRVClassifier
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, DTNNTensorGraph, DAGTensorGraph, GraphConvTensorGraph
from deepchem.models.tensorgraph.models.symmetry_function_regression import BPSymmetryFunctionRegression, ANIRegression
+61 −38
Original line number Diff line number Diff line
@@ -75,15 +75,15 @@ class WeaveLayer(Layer):
    """
    Parameters
    ----------
        n_atom_input_feat: int
    n_atom_input_feat: int, optional
      Number of features for each atom in input.
        n_pair_input_feat: int
    n_pair_input_feat: int, optional
      Number of features for each pair of atoms in input.
        n_atom_output_feat: int
    n_atom_output_feat: int, optional
      Number of features for each atom in output.
        n_pair_output_feat: int
    n_pair_output_feat: int, optional
      Number of features for each pair of atoms in output.
        n_hidden_XX: int
    n_hidden_XX: int, optional
      Number of units(convolution depths) in corresponding hidden layer
    update_pair: bool, optional
      Whether to calculate for pair features,
@@ -96,6 +96,7 @@ class WeaveLayer(Layer):
      Dropout probability, not supported here

    """
    super(WeaveLayer, self).__init__(**kwargs)
    self.init = initializations.get(init)  # Set weight initialization
    self.activation = activations.get(activation)  # Get activations
    self.update_pair = update_pair  # last weave layer does not need to update
@@ -111,7 +112,6 @@ class WeaveLayer(Layer):
    self.n_atom_output_feat = n_atom_output_feat
    self.n_pair_output_feat = n_pair_output_feat
    self.W_AP, self.b_AP, self.W_PP, self.b_PP, self.W_P, self.b_P = None, None, None, None, None, None
    super(WeaveLayer, self).__init__(**kwargs)

  def build(self):
    """ Construct internal trainable weights.
@@ -200,6 +200,7 @@ class WeaveLayer(Layer):
      P = self.activation(P)
    else:
      P = pair_features

    out_tensor = [A, P]
    if set_tensors:
      self.variables = self.trainable_weights
@@ -295,6 +296,7 @@ class WeaveGather(Layer):
    if self.gaussian_expand:
      output_molecules = tf.matmul(output_molecules, self.W) + self.b
      output_molecules = self.activation(output_molecules)

    out_tensor = output_molecules
    if set_tensors:
      self.variables = self.trainable_weights
@@ -336,7 +338,7 @@ class DTNNEmbedding(Layer):

  def __init__(self,
               n_embedding=30,
               periodic_table_length=83,
               periodic_table_length=30,
               init='glorot_uniform',
               **kwargs):
    """
@@ -470,6 +472,7 @@ class DTNNGather(Layer):
               n_embedding=30,
               n_outputs=100,
               layer_sizes=[100],
               output_activation=True,
               init='glorot_uniform',
               activation='tanh',
               **kwargs):
@@ -490,6 +493,7 @@ class DTNNGather(Layer):
    self.n_embedding = n_embedding
    self.n_outputs = n_outputs
    self.layer_sizes = layer_sizes
    self.output_activation = output_activation
    self.init = initializations.get(init)  # Set weight initialization
    self.activation = activations.get(activation)  # Get activations

@@ -524,9 +528,12 @@ class DTNNGather(Layer):
    self.build()
    output = in_layers[0].out_tensor
    atom_membership = in_layers[1].out_tensor
    for i, W in enumerate(self.W_list):
    for i, W in enumerate(self.W_list[:-1]):
      output = tf.matmul(output, W) + self.b_list[i]
      output = self.activation(output)
    output = tf.matmul(output, self.W_list[-1]) + self.b_list[-1]
    if self.output_activation:
      output = self.activation(output)
    output = tf.segment_sum(output, atom_membership)
    out_tensor = output
    if set_tensors:
@@ -535,6 +542,22 @@ class DTNNGather(Layer):
    return out_tensor


class DTNNExtract(Layer):

  def __init__(self, task_id, **kwargs):
    self.task_id = task_id
    super(DTNNExtract, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)
    output = in_layers[0].out_tensor
    out_tensor = output[:, self.task_id:self.task_id + 1]
    self.out_tensor = out_tensor
    return out_tensor


class DAGLayer(Layer):
  """ TensorGraph style implementation
    The same as deepchem.nn.DAGLayer
+68 −24
Original line number Diff line number Diff line
@@ -5,7 +5,7 @@ from collections import Sequence
import tensorflow as tf
import numpy as np

from deepchem.nn import model_ops, initializations
from deepchem.nn import model_ops, initializations, regularizers, activations


class Layer(object):
@@ -1131,6 +1131,50 @@ class BatchNorm(Layer):
    return out_tensor


class BatchNormalization(Layer):

  def __init__(self,
               epsilon=1e-5,
               axis=-1,
               momentum=0.99,
               beta_init='zero',
               gamma_init='one',
               **kwargs):
    self.beta_init = initializations.get(beta_init)
    self.gamma_init = initializations.get(gamma_init)
    self.epsilon = epsilon
    self.axis = axis
    self.momentum = momentum
    super(BatchNormalization, self).__init__(**kwargs)

  def add_weight(self, shape, initializer, name=None):
    initializer = initializations.get(initializer)
    weight = initializer(shape, name=name)
    return weight

  def build(self, input_shape):
    shape = (input_shape[self.axis],)
    self.gamma = self.add_weight(
        shape, initializer=self.gamma_init, name='{}_gamma'.format(self.name))
    self.beta = self.add_weight(
        shape, initializer=self.beta_init, name='{}_beta'.format(self.name))

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
    x = inputs[0]
    input_shape = model_ops.int_shape(x)
    self.build(input_shape)
    m = model_ops.mean(x, axis=-1, keepdims=True)
    std = model_ops.sqrt(
        model_ops.var(x, axis=-1, keepdims=True) + self.epsilon)
    x_normed = (x - m) / (std + self.epsilon)
    x_normed = self.gamma * x_normed + self.beta
    out_tensor = x_normed
    if set_tensors:
      self.out_tensor = out_tensor
    return out_tensor


class WeightedError(Layer):

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
Loading