Unverified Commit 3e8c37fe authored by Nathan Frey's avatar Nathan Frey Committed by GitHub
Browse files

Merge pull request #2412 from ncfrey/atomic-conv-model-fix

Fix AtomicConv slow tests
parents 187919f2 a905db1b
Loading
Loading
Loading
Loading
+3 −113
Original line number Original line Diff line number Diff line
@@ -17,102 +17,6 @@ from deepchem.utils.typing import KerasActivationFn, LossFn, OneOrMany


logger = logging.getLogger(__name__)
logger = logging.getLogger(__name__)


# class AtomicConvScore(Layer):
#   """The scoring function used by the atomic convolution models."""

#   def __init__(self, atom_types, layer_sizes, **kwargs):
#     super(AtomicConvScore, self).__init__(**kwargs)
#     self.atom_types = atom_types
#     self.layer_sizes = layer_sizes

#   def build(self, input_shape):
#     self.type_weights = []
#     self.type_biases = []
#     self.output_weights = []
#     self.output_biases = []
#     n_features = int(input_shape[0][-1])
#     layer_sizes = self.layer_sizes
#     num_layers = len(layer_sizes)
#     weight_init_stddevs = [1 / np.sqrt(x) for x in layer_sizes]
#     bias_init_consts = [0.0] * num_layers
#     for ind, atomtype in enumerate(self.atom_types):
#       prev_layer_size = n_features
#       self.type_weights.append([])
#       self.type_biases.append([])
#       self.output_weights.append([])
#       self.output_biases.append([])
#       for i in range(num_layers):
#         weight, bias = initializeWeightsBiases(
#             prev_layer_size=prev_layer_size,
#             size=layer_sizes[i],
#             weights=tf.random.truncated_normal(
#                 shape=[prev_layer_size, layer_sizes[i]],
#                 stddev=weight_init_stddevs[i]),
#             biases=tf.constant(
#                 value=bias_init_consts[i], shape=[layer_sizes[i]]))
#         self.type_weights[ind].append(weight)
#         self.type_biases[ind].append(bias)
#         prev_layer_size = layer_sizes[i]
#       weight, bias = initializeWeightsBiases(prev_layer_size, 1)
#       self.output_weights[ind].append(weight)
#       self.output_biases[ind].append(bias)

#   def call(self, inputs):
#     frag1_layer, frag2_layer, complex_layer, frag1_z, frag2_z, complex_z = inputs
#     atom_types = self.atom_types
#     num_layers = len(self.layer_sizes)

#     def atomnet(current_input, atomtype):
#       prev_layer = current_input
#       for i in range(num_layers):
#         #layer = tf.nn.bias_add(
#         #    tf.matmul(prev_layer, self.type_weights[atomtype][i]),
#         #    self.type_biases[atomtype][i])
#         #layer = tf.nn.relu(layer)
#         layer = Dense(100)(prev_layer)
#         prev_layer = layer

#       #output_layer = tf.squeeze(
#       #    tf.nn.bias_add(
#       #        tf.matmul(prev_layer, self.output_weights[atomtype][0]),
#       #        self.output_biases[atomtype][0]))
#       print("self.output_weights[atomtype][0].shape")
#       print(self.output_weights[atomtype][0].shape)
#       output_layer = Dense(
#           self.output_weights[atomtype][0].shape[0])(prev_layer)
#       return output_layer

#     frag1_zeros = tf.zeros_like(frag1_z, dtype=tf.float32)
#     frag2_zeros = tf.zeros_like(frag2_z, dtype=tf.float32)
#     complex_zeros = tf.zeros_like(complex_z, dtype=tf.float32)

#     frag1_atomtype_energy = []
#     frag2_atomtype_energy = []
#     complex_atomtype_energy = []

#     for ind, atomtype in enumerate(atom_types):
#       frag1_outputs = tf.map_fn(lambda x: atomnet(x, ind), frag1_layer)
#       frag2_outputs = tf.map_fn(lambda x: atomnet(x, ind), frag2_layer)
#       complex_outputs = tf.map_fn(lambda x: atomnet(x, ind), complex_layer)

#       cond = tf.equal(frag1_z, atomtype)
#       frag1_atomtype_energy.append(tf.where(cond, frag1_outputs, frag1_zeros))
#       cond = tf.equal(frag2_z, atomtype)
#       frag2_atomtype_energy.append(tf.where(cond, frag2_outputs, frag2_zeros))
#       cond = tf.equal(complex_z, atomtype)
#       complex_atomtype_energy.append(
#           tf.where(cond, complex_outputs, complex_zeros))

#     frag1_outputs = tf.add_n(frag1_atomtype_energy)
#     frag2_outputs = tf.add_n(frag2_atomtype_energy)
#     complex_outputs = tf.add_n(complex_atomtype_energy)

#     frag1_energy = tf.reduce_sum(frag1_outputs, 1)
#     frag2_energy = tf.reduce_sum(frag2_outputs, 1)
#     complex_energy = tf.reduce_sum(complex_outputs, 1)
#     binding_energy = complex_energy - (frag1_energy + frag2_energy)
#     return tf.expand_dims(binding_energy, axis=1)



class AtomicConvModel(KerasModel):
class AtomicConvModel(KerasModel):
  """Implements an Atomic Convolution Model.
  """Implements an Atomic Convolution Model.
@@ -203,7 +107,7 @@ class AtomicConvModel(KerasModel):
    learning_rate: float
    learning_rate: float
      Learning rate for the model.
      Learning rate for the model.
    """
    """
    # TODO: Turning off queue for now. Safe to re-activate?

    self.complex_num_atoms = complex_num_atoms
    self.complex_num_atoms = complex_num_atoms
    self.frag1_num_atoms = frag1_num_atoms
    self.frag1_num_atoms = frag1_num_atoms
    self.frag2_num_atoms = frag2_num_atoms
    self.frag2_num_atoms = frag2_num_atoms
@@ -260,16 +164,11 @@ class AtomicConvModel(KerasModel):
        regularizer = tf.keras.regularizers.l2(weight_decay_penalty)
        regularizer = tf.keras.regularizers.l2(weight_decay_penalty)
    else:
    else:
      regularizer = None
      regularizer = None
    # score = AtomicConvScore(self.atom_types, layer_sizes)([

    #    self._frag1_conv, self._frag2_conv, self._complex_conv, frag1_z,
    #    frag2_z, complex_z
    # ])
    # print("score")
    # print(score)
    prev_layer = concat
    prev_layer = concat
    # dropout_switch = Input(shape=tuple())
    prev_size = concat.shape[0]
    prev_size = concat.shape[0]
    next_activation = None
    next_activation = None

    # Add the dense layers
    # Add the dense layers


    for size, weight_stddev, bias_const, dropout, activation_fn in zip(
    for size, weight_stddev, bias_const, dropout, activation_fn in zip(
@@ -278,9 +177,6 @@ class AtomicConvModel(KerasModel):
      layer = prev_layer
      layer = prev_layer
      if next_activation is not None:
      if next_activation is not None:
        layer = Activation(next_activation)(layer)
        layer = Activation(next_activation)(layer)
      print("size")
      print(size)
      # layer = Dense(100)(layer)
      layer = Dense(
      layer = Dense(
          size,
          size,
          kernel_initializer=tf.keras.initializers.TruncatedNormal(
          kernel_initializer=tf.keras.initializers.TruncatedNormal(
@@ -305,11 +201,6 @@ class AtomicConvModel(KerasModel):
        bias_initializer=tf.constant_initializer(
        bias_initializer=tf.constant_initializer(
            value=bias_init_consts[-1]))(prev_layer))
            value=bias_init_consts[-1]))(prev_layer))
    loss: Union[dc.models.losses.Loss, LossFn]
    loss: Union[dc.models.losses.Loss, LossFn]
    # prev_layer = Dense(100)(prev_layer)
    # output = Dense(1)(prev_layer)
    # print("output")
    # print(output)
    # loss = dc.models.losses.L2Loss()


    model = tf.keras.Model(
    model = tf.keras.Model(
        inputs=[
        inputs=[
@@ -317,7 +208,6 @@ class AtomicConvModel(KerasModel):
            frag2_nbrs_z, frag2_z, complex_X, complex_nbrs, complex_nbrs_z,
            frag2_nbrs_z, frag2_z, complex_X, complex_nbrs, complex_nbrs_z,
            complex_z
            complex_z
        ],
        ],
        # outputs=score)
        outputs=output)
        outputs=output)
    super(AtomicConvModel, self).__init__(
    super(AtomicConvModel, self).__init__(
        model, L2Loss(), batch_size=batch_size, **kwargs)
        model, L2Loss(), batch_size=batch_size, **kwargs)
+36 −21
Original line number Original line Diff line number Diff line
@@ -6,14 +6,26 @@ import os


import pytest
import pytest


import deepchem
import numpy as np
import tensorflow as tf
import unittest
import numpy as np
import numpy as np
from deepchem.models import atomic_conv
from deepchem.models import atomic_conv
from deepchem.data import NumpyDataset
from deepchem.data import NumpyDataset
from deepchem.feat import ComplexNeighborListFragmentAtomicCoordinates
from deepchem.feat import AtomicConvFeaturizer


def test_atomic_conv_initialize():
  """Quick test of AtomicConv."""
  acm = atomic_conv.AtomicConvModel(
      n_tasks=1,
      batch_size=1,
      layer_sizes=[
          1,
      ],
      frag1_num_atoms=5,
      frag2_num_atoms=5,
      complex_num_atoms=10)

  assert acm.complex_num_atoms == 10
  assert len(acm.atom_types) == 15




@pytest.mark.slow
@pytest.mark.slow
@@ -26,6 +38,7 @@ def test_atomic_conv():
  atomic_convnet = atomic_conv.AtomicConvModel(
  atomic_convnet = atomic_conv.AtomicConvModel(
      n_tasks=1,
      n_tasks=1,
      batch_size=batch_size,
      batch_size=batch_size,
      layer_sizes=[10],
      frag1_num_atoms=5,
      frag1_num_atoms=5,
      frag2_num_atoms=5,
      frag2_num_atoms=5,
      complex_num_atoms=10,
      complex_num_atoms=10,
@@ -40,7 +53,6 @@ def test_atomic_conv():
  frag1_z = np.random.randint(10, size=(N_atoms))
  frag1_z = np.random.randint(10, size=(N_atoms))
  frag2_coords = np.random.rand(N_atoms, 3)
  frag2_coords = np.random.rand(N_atoms, 3)
  frag2_nbr_list = {0: [], 1: [], 2: [], 3: [], 4: []}
  frag2_nbr_list = {0: [], 1: [], 2: [], 3: [], 4: []}
  #frag2_z = np.random.rand(N_atoms, 3)
  frag2_z = np.random.randint(10, size=(N_atoms))
  frag2_z = np.random.randint(10, size=(N_atoms))
  system_coords = np.random.rand(2 * N_atoms, 3)
  system_coords = np.random.rand(2 * N_atoms, 3)
  system_nbr_list = {
  system_nbr_list = {
@@ -63,20 +75,13 @@ def test_atomic_conv():
  features = np.asarray(features)
  features = np.asarray(features)
  labels = np.random.rand(batch_size)
  labels = np.random.rand(batch_size)
  train = NumpyDataset(features, labels)
  train = NumpyDataset(features, labels)
  #atomic_convnet.fit(train, nb_epoch=300)
  atomic_convnet.fit(train, nb_epoch=150)
  atomic_convnet.fit(train, nb_epoch=150)
  print("labels")
  print(labels)
  print("atomic_convnet.predict(train)")
  print(atomic_convnet.predict(train))
  assert np.allclose(labels, atomic_convnet.predict(train), atol=0.01)
  assert np.allclose(labels, atomic_convnet.predict(train), atol=0.01)




@pytest.mark.slow
@pytest.mark.slow
def test_atomic_conv_variable():
def test_atomic_conv_variable():
  """A simple test that initializes and fits an AtomicConvModel on variable input size."""
  """A simple test that initializes and fits an AtomicConvModel on variable input size."""
  # For simplicity, let's assume both molecules have same number of
  # atoms.
  frag1_num_atoms = 1000
  frag1_num_atoms = 1000
  frag2_num_atoms = 1200
  frag2_num_atoms = 1200
  complex_num_atoms = frag1_num_atoms + frag2_num_atoms
  complex_num_atoms = frag1_num_atoms + frag2_num_atoms
@@ -84,6 +89,9 @@ def test_atomic_conv_variable():
  atomic_convnet = atomic_conv.AtomicConvModel(
  atomic_convnet = atomic_conv.AtomicConvModel(
      n_tasks=1,
      n_tasks=1,
      batch_size=batch_size,
      batch_size=batch_size,
      layer_sizes=[
          10,
      ],
      frag1_num_atoms=frag1_num_atoms,
      frag1_num_atoms=frag1_num_atoms,
      frag2_num_atoms=frag2_num_atoms,
      frag2_num_atoms=frag2_num_atoms,
      complex_num_atoms=complex_num_atoms)
      complex_num_atoms=complex_num_atoms)
@@ -108,6 +116,9 @@ def test_atomic_conv_variable():
  labels = np.zeros(batch_size)
  labels = np.zeros(batch_size)
  train = NumpyDataset(features, labels)
  train = NumpyDataset(features, labels)
  atomic_convnet.fit(train, nb_epoch=1)
  atomic_convnet.fit(train, nb_epoch=1)
  preds = atomic_convnet.predict(train)
  assert preds.shape == (1, 1, 1)
  assert np.count_nonzero(preds) > 0




@pytest.mark.slow
@pytest.mark.slow
@@ -117,28 +128,29 @@ def test_atomic_conv_with_feat():
  ligand_file = os.path.join(dir_path,
  ligand_file = os.path.join(dir_path,
                             "../../feat/tests/data/3zso_ligand_hyd.pdb")
                             "../../feat/tests/data/3zso_ligand_hyd.pdb")
  protein_file = os.path.join(dir_path,
  protein_file = os.path.join(dir_path,
                              "../../feat/tests/data/3zso_protein.pdb")
                              "../../feat/tests/data/3zso_protein_noH.pdb")
  # Pulled from PDB files. For larger datasets with more PDBs, would use
  # Pulled from PDB files. For larger datasets with more PDBs, would use
  # max num atoms instead of exact.
  # max num atoms instead of exact.
  frag1_num_atoms = 44  # for ligand atoms
  frag1_num_atoms = 44  # for ligand atoms
  frag2_num_atoms = 2336  # for protein atoms
  frag2_num_atoms = 2334  # for protein atoms
  complex_num_atoms = 2380  # in total
  complex_num_atoms = 2378  # in total
  max_num_neighbors = 4
  max_num_neighbors = 4
  # Cutoff in angstroms
  # Cutoff in angstroms
  neighbor_cutoff = 4
  neighbor_cutoff = 4
  complex_featurizer = ComplexNeighborListFragmentAtomicCoordinates(
  complex_featurizer = AtomicConvFeaturizer(frag1_num_atoms, frag2_num_atoms,
      frag1_num_atoms, frag2_num_atoms, complex_num_atoms, max_num_neighbors,
                                            complex_num_atoms,
      neighbor_cutoff)
                                            max_num_neighbors, neighbor_cutoff)
  # arbitrary label
  # arbitrary label
  labels = np.array([0])
  labels = np.array([0])
  features, _ = complex_featurizer.featurize([ligand_file], [protein_file])
  features = complex_featurizer.featurize([(ligand_file, protein_file)])
  dataset = deepchem.data.DiskDataset.from_numpy(features, labels)
  dataset = NumpyDataset(features, labels)


  batch_size = 1
  batch_size = 1
  print("Constructing Atomic Conv model")
  print("Constructing Atomic Conv model")
  atomic_convnet = atomic_conv.AtomicConvModel(
  atomic_convnet = atomic_conv.AtomicConvModel(
      n_tasks=1,
      n_tasks=1,
      batch_size=batch_size,
      batch_size=batch_size,
      layer_sizes=[10],
      frag1_num_atoms=frag1_num_atoms,
      frag1_num_atoms=frag1_num_atoms,
      frag2_num_atoms=frag2_num_atoms,
      frag2_num_atoms=frag2_num_atoms,
      complex_num_atoms=complex_num_atoms)
      complex_num_atoms=complex_num_atoms)
@@ -146,3 +158,6 @@ def test_atomic_conv_with_feat():
  print("About to call fit")
  print("About to call fit")
  # Run a fitting operation
  # Run a fitting operation
  atomic_convnet.fit(dataset)
  atomic_convnet.fit(dataset)
  preds = atomic_convnet.predict(dataset)
  assert preds.shape == (1, 1, 1)
  assert np.count_nonzero(preds) > 0
+3 −3
Original line number Original line Diff line number Diff line
@@ -229,10 +229,10 @@ RdkitGridFeaturizer
  :members:
  :members:
  :inherited-members:
  :inherited-members:


AtomConvFeaturizer
AtomicConvFeaturizer
^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^


.. autoclass:: deepchem.feat.NeighborListComplexAtomicCoordinates
.. autoclass:: deepchem.feat.AtomicConvFeaturizer
  :members:
  :members:
  :inherited-members:
  :inherited-members: