Commit 01661c84 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Basic vina test passing.

parent 90021fac
Loading
Loading
Loading
Loading
+28 −20
Original line number Diff line number Diff line
@@ -678,7 +678,6 @@ class BatchNormLayer(Layer):
    self.out_tensor = tf.layers.batch_normalization(parent_tensor)
    return self.out_tensor


class WeightedError(Layer):

  def _create_tensor(self):
@@ -787,13 +786,14 @@ class NeighborList(Layer):
  are close to each other spatially
  """

  def __init__(self, max_num_atoms, max_num_nbrs, ndim, n_cells, k, nbr_cutoff, **kwargs):
  def __init__(self, N_atoms, M_nbrs, ndim, n_cells, k, nbr_cutoff, start,
               stop, **kwargs):
    """
    Parameters
    ----------
    max_num_atoms: int
    N_atoms: int
      Maximum number of atoms this layer will neighbor-list.
    max_num_nbrs: int
    M_nbrs: int
      Maximum number of spatial neighbors possible for atom.
    ndim: int
      Dimensionality of space atoms live in. (Typically 3D, but sometimes will
@@ -802,16 +802,18 @@ class NeighborList(Layer):
      Number of grid cells in the simulation box.
    k: int
      Number of nearest neighbors to pull in using tf.nn.top_k.
      TODO(rbharath): Are both k and max_num_nbrs needed?
      TODO(rbharath): Are both k and M_nbrs needed?
    nbr_cutoff: float
      Length in Angstroms (?) at which atom boxes are gridded.
    """
    self.N = max_num_atoms
    self.M = max_num_nbrs
    self.N = N_atoms 
    self.M = M_nbrs 
    self.ndim = ndim
    self.n_cells = n_cells
    self.k = k
    self.nbr_cutoff = nbr_cutoff
    self.start = start
    self.stop = stop
    super(NeighborList, self).__init__(**kwargs)

  def _create_tensor(self):
@@ -823,6 +825,11 @@ class NeighborList(Layer):
      # TODO(rbharath): Support batching
      raise ValueError("Parent tensor must be (num_atoms, ndum)")
    coords = parent.out_tensor
    ################################################################ DEBUG
    print("NeighborList._create_tensor")
    print("coords")
    print(coords)
    ################################################################ DEBUG
    nbr_list = self._compute_nbr_list(coords)
    self.out_tensor = nbr_list
    return nbr_list
@@ -843,9 +850,7 @@ class NeighborList(Layer):
    """
    N, M, n_cells, ndim, k = self.N, self.M, self.n_cells, self.ndim, self.k
    nbr_cutoff = self.nbr_cutoff
    start = tf.to_int32(tf.reduce_min(coords))
    stop = tf.to_int32(tf.reduce_max(coords))
    cells = self._get_cells(start, stop)
    cells = self._get_cells()
    # Associate each atom with cell it belongs to. O(N*n_cells)
    # Shape (n_cells, k)
    atoms_in_cells, _ = self._put_atoms_in_cells(coords, cells)
@@ -1033,6 +1038,13 @@ class NeighborList(Layer):
    # Tile cells to form arrays of size (n_cells*n_cells, ndim)
    # Two tilings (a, b, c, a, b, c, ...) vs. (a, a, a, b, b, b, etc.)
    # Tile (a, a, a, b, b, b, etc.)
    ################################################################# DEBUG
    print("_compute_neighbor_cells")
    print("cells")
    print(cells)
    print("n_cells")
    print(n_cells)
    ################################################################# DEBUG
    tiled_centers = tf.reshape(
        tf.tile(cells, (1, n_cells)), (n_cells * n_cells, ndim))
    # Tile (a, b, c, a, b, c, ...)
@@ -1056,7 +1068,7 @@ class NeighborList(Layer):

    return closest_inds

  def _get_cells(self, start, stop):
  def _get_cells(self):
    """Returns the locations of all grid points in box.

    Suppose start is -10 Angstrom, stop is 10 Angstrom, nbr_cutoff is 1.
@@ -1066,13 +1078,9 @@ class NeighborList(Layer):
    Returns
    -------
    cells: tf.Tensor
      (box_size**ndim, ndim) shape.
      (n_cells, ndim) shape.
    """
    return tf.reshape(
        tf.transpose(
            tf.stack(
                tf.meshgrid(
                    * [tf.range(start, stop, self.nbr_cutoff) for _ in range(self.ndim)]))),
        (-1, self.ndim))

  
    start, stop, nbr_cutoff = self.start, self.stop, self.nbr_cutoff
    mesh_args = [tf.range(start, stop, nbr_cutoff) for _ in range(self.ndim)]
    return tf.reshape(tf.transpose(tf.stack(tf.meshgrid(*mesh_args))),
        (self.n_cells, self.ndim))
+122 −3
Original line number Diff line number Diff line
@@ -10,7 +10,9 @@ import deepchem as dc
from deepchem.data import NumpyDataset
from deepchem.data.datasets import Databag
from deepchem.models.tensorgraph.layers import ReduceSum 
from deepchem.models.tensorgraph.layers import Flatten
from deepchem.models.tensorgraph.layers import Feature, Label
from deepchem.models.tensorgraph.layers import Dense
from deepchem.models.tensorgraph.layers import ToFloat
from deepchem.models.tensorgraph.layers import Concat
from deepchem.models.tensorgraph.layers import NeighborList
@@ -85,6 +87,123 @@ class TestDocking(test_util.TensorFlowTestCase):
    tg.set_loss(loss)
    tg.fit_generator(databag.iterbatches(epochs=1))

  def test_vina_repulsion(self):
    """Test that VinaRepulsion works."""
    N_atoms = 10
    M_nbrs = 5
    X = np.random.rand(N_atoms, M_nbrs)
    X_tensor = tf.convert_to_tensor(X)

    repulsions = VinaRepulsion()(X_tensor)

    with self.test_session() as sess:
      repulsions_np = repulsions.eval()
      assert repulsions_np.shape == (N_atoms, M_nbrs)

  def test_vina_hydrophobic(self):
    """Test that VinaHydrophobic works."""
    N_atoms = 10
    M_nbrs = 5
    X = np.random.rand(N_atoms, M_nbrs)
    X_tensor = tf.convert_to_tensor(X)

    hydrophobic = VinaHydrophobic()(X_tensor)

    with self.test_session() as sess:
      hydrophobic_np = hydrophobic.eval()
      assert hydrophobic_np.shape == (N_atoms, M_nbrs)

  def test_vina_hbond(self):
    """Test that VinaHydrophobic works."""
    N_atoms = 10
    M_nbrs = 5
    X = np.random.rand(N_atoms, M_nbrs)
    X_tensor = tf.convert_to_tensor(X)

    hbond = VinaHydrogenBond()(X_tensor)

    with self.test_session() as sess:
      hbond_np = hbond.eval()
      assert hbond_np.shape == (N_atoms, M_nbrs)

  def test_vina_gaussian_first(self):
    """Test that VinaGaussianFirst works."""
    N_atoms = 10
    M_nbrs = 5
    X = np.random.rand(N_atoms, M_nbrs)
    X_tensor = tf.convert_to_tensor(X)

    gauss_1 = VinaGaussianFirst()(X_tensor)

    with self.test_session() as sess:
      gauss_1_np = gauss_1.eval()
      assert gauss_1_np.shape == (N_atoms, M_nbrs)

  def test_vina_gaussian_second(self):
    """Test that VinaGaussianSecond works."""
    N_atoms = 10
    M_nbrs = 5
    X = np.random.rand(N_atoms, M_nbrs)
    X_tensor = tf.convert_to_tensor(X)

    gauss_2 = VinaGaussianSecond()(X_tensor)

    with self.test_session() as sess:
      gauss_2_np = gauss_2.eval()
      assert gauss_2_np.shape == (N_atoms, M_nbrs)

  def test_neighbor_list(self):
    """Test that NeighborList works."""
    N_atoms = 5 
    start = 0
    stop = 12
    nbr_cutoff = 3
    ndim = 3
    M_nbrs = 2
    k = 5
    # The number of cells which we should theoretically have
    n_cells = int(((stop - start) / nbr_cutoff)**ndim)

    with self.test_session() as sess:
      coords = start + np.random.rand(N_atoms, ndim) * (stop - start)
      coords = tf.stack(coords)
      nbr_list = NeighborList(N_atoms, M_nbrs, ndim, n_cells, k, nbr_cutoff)(
          coords)
      nbr_list = nbr_list.eval()
      assert nbr_list.shape == (N_atoms, M_nbrs)

  def test_neighbor_list_vina(self):
    """Test under conditions closer to Vina usage."""
    N_atoms = 5
    M_nbrs = 2
    ndim = 3
    k = 5
    start = 0
    stop = 4
    nbr_cutoff = 1
    # The number of cells which we should theoretically have
    n_cells = ((stop - start) / nbr_cutoff)**ndim

    X = NumpyDataset(start + np.random.rand(N_atoms, ndim) * (stop - start))

    coords = Feature(shape=(N_atoms, ndim))

    # Now an (N, M) shape
    nbr_list = NeighborList(N_atoms, M_nbrs, ndim, n_cells, k,
                            nbr_cutoff, start, stop, in_layers=[coords])

    nbr_list = ToFloat(in_layers=[nbr_list])
    flattened = Flatten(in_layers=[nbr_list])
    dense = Dense(out_channels=1, in_layers=[flattened])
    output = ReduceSum(in_layers=[dense])
    

    tg = dc.models.TensorGraph(learning_rate=0.1, use_queue=False)
    tg.set_loss(output)

    databag = Databag({coords: X})
    tg.fit_generator(databag.iterbatches(epochs=1))

  def test_vina(self):
    """Test that vina graph can be constructed in TensorGraph."""
    N_protein = 4
@@ -99,8 +218,8 @@ class TestDocking(test_util.TensorFlowTestCase):
    # The number of cells which we should theoretically have
    n_cells = ((stop - start) / nbr_cutoff)**ndim

    X_prot = NumpyDataset(np.random.rand(N_protein, ndim))
    X_ligand = NumpyDataset(np.random.rand(N_ligand, ndim))
    X_prot = NumpyDataset(start + np.random.rand(N_protein, ndim) * (stop - start))
    X_ligand = NumpyDataset(start + np.random.rand(N_ligand, ndim) * (stop - start))
    y = NumpyDataset(np.random.rand(1,))

    # TODO(rbharath): Mysteriously, the actual atom types aren't
@@ -118,7 +237,7 @@ class TestDocking(test_util.TensorFlowTestCase):

    # Now an (N, M) shape
    nbr_list = NeighborList(N_protein+N_ligand, M_nbrs, ndim, n_cells, k,
                            nbr_cutoff, in_layers=[coords])
                            nbr_cutoff, start, stop, in_layers=[coords])

    # Shape (N, M)
    dists = InteratomicL2Distances(N_protein+N_ligand, M_nbrs, ndim,