Commit d009ccb9 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

More layers and API updates

parent a903efcb
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -1049,8 +1049,11 @@ class Databag(object):
  A utility class to iterate through multiple datasets together.
  """

  def __init__(self):
  def __init__(self, datasets=None):
    if datasets is None:
      self.datasets = dict()
    else:
      self.datasets = datasets

  def add_dataset(self, key, dataset):
    self.datasets[key] = dataset
+109 −6
Original line number Diff line number Diff line
@@ -49,6 +49,25 @@ class Layer(object):
  def __hash__(self):
    return hash(self.__key())

  def __call__(self, *in_layers):
    if len(in_layers) > 0:
      layers = []
      for in_layer in in_layers:
        if isinstance(in_layer, Layer):
          layers.append(layer)
        elif isinstance(in_layer, tf.Tensor):
          layers.append(TensorWrapper(in_layer))
        else:
          raise ValueError("Layer must be invoked on layers or tensors")
      self.in_layers = layers
    self._create_tensor()
  

class TensorWrapper(Layer):
  """Used to wrap a tensorflow tensor."""
  def __init__(self, out_tensor):
    self.out_tensor = out_tensor


class Conv1DLayer(Layer):

@@ -269,7 +288,8 @@ class SoftMax(Layer):

class Concat(Layer):

  def __init__(self, **kwargs):
  def __init__(self, axis=1, **kwargs):
    self.axis = axis
    super(Concat, self).__init__(**kwargs)

  def _create_tensor(self):
@@ -278,10 +298,30 @@ class Concat(Layer):
      return self.out_tensor
    out_tensors = [x.out_tensor for x in self.in_layers]

    self.out_tensor = tf.concat(out_tensors, 1)
    self.out_tensor = tf.concat(out_tensors, axis=self.axis)
    return self.out_tensor


class InteratomicL2Distances(Layer):
  """Compute (squared) L2 Distances between atoms given neighbors."""

  def _create_tensor(self):
    if len(self.in_layers) != 2:
      raise ValueError("InteratomicDistances requires coords,nbr_list")
    coords, nbr_list = (self.in_layers[0].out_tensor,
                        self.in_layers[1].out_tensor)
    N_atoms, ndim = coords.get_shape()
    _, M = nbr_list.get_shape()
    # Shape (N_atoms, M, ndim)
    nbr_coords = tf.gather(coords, nbr_list)
    # Shape (N_atoms, M, ndim)
    tiled_atom_coords = tf.tile(
        tf.reshape(atom_coords, (N_atoms, 1, ndim)), (1, M, 1))
    # Shape (N_atoms, M)
    dists = tf.reduce_sum((tiled_atom_coords - nbr_coords)**2, axis=2)



class SoftMaxCrossEntropy(Layer):

  def __init__(self, **kwargs):
@@ -299,6 +339,10 @@ class SoftMaxCrossEntropy(Layer):

class ReduceMean(Layer):

  def __init__(self, axis=None, **kwargs):
    self.axis=axis
    super(ReduceMean, self).__init__(**kwargs)

  def _create_tensor(self):
    if len(self.in_layers) > 1:
      out_tensors = [x.out_tensor for x in self.in_layers]
@@ -309,16 +353,41 @@ class ReduceMean(Layer):
    self.out_tensor = tf.reduce_mean(self.out_tensor)
    return self.out_tensor

class ToFloat(Layer):
  def _create_tensor(self):
    if len(self.in_layers) > 1:
      raise ValueError("Only one layer supported.")
    self.out_tensor = tf.to_float(self.in_layers[0].out_tensor)
    return self.out_tensor

class ReduceSum(Layer):

  def __init__(self, axis=None, **kwargs):
    self.axis=axis
    super(ReduceSum, self).__init__(**kwargs)

  def _create_tensor(self):
    if len(self.in_layers) > 1:
      out_tensors = [x.out_tensor for x in self.in_layers]
      self.out_tensor = tf.stack(out_tensors)
    else:
      self.out_tensor = self.in_layers[0].out_tensor

    self.out_tensor = tf.reduce_sum(self.out_tensor, axis=self.axis)
    return self.out_tensor


class ReduceSquareDifference(Layer):

  def __init__(self, **kwargs):
  def __init__(self, axis=None, **kwargs):
    self.axis=axis
    super(ReduceSquareDifference, self).__init__(**kwargs)

  def _create_tensor(self):
    a = self.in_layers[0].out_tensor
    b = self.in_layers[1].out_tensor
    self.out_tensor = tf.reduce_mean(tf.squared_difference(a, b))
    self.out_tensor = tf.reduce_mean(tf.squared_difference(a, b),
                                     axis=self.axis)
    return self.out_tensor


@@ -651,10 +720,44 @@ class VinaHydrogenBond(Layer):
                               tf.where(d < 0,
                                        (1.0 / 0.7) * (0 - d),
                                        tf.zeros_like(d)))
    return self.out_tensor

def VinaGaussianFirst(Layer):
class VinaGaussianFirst(Layer):
  """Computes Autodock Vina's first Gaussian interaction term."""
  pass
  
  def _create_tensor(self):
    d = self.in_layers[0].out_tensor
    self.out_tensor = tf.exp(-(d / 0.5)**2)
    return self.out_tensor

class VinaGaussianSecond(Layer):
  """Computes Autodock Vina's second Gaussian interaction term."""

  def _create_tensor(self):
    d = self.in_layers[0].out_tensor
    self.out_tensor = tf.exp(-((d - 3) / 2)**2)
    return self.out_tensor


class WeightedLinearCombo(Layer):
  """Computes a weighted linear combination of input layers.""" 

  def __init__(self, std=.3, **kwargs):
    self.std = std
    super(WeightedLinearCombo, self).__init__(**kwargs)

  def _create_tensor(self):
    weights = []
    out_tensor = None
    for in_layer in self.in_layers:
      w = tf.Variable(tf.random_normal([1,], stddev=self.std))
      if out_tensor is None:
        out_tensor = w * in_layer.out_tensor
      else:
        out_tensor += w * in_layer.out_tensor
    self.out_tensor = out_tensor
    return self.out_tensor

    
class NeighborList(Layer):
  """Computes a neighbor-list on the GPU.
+106 −0
Original line number Diff line number Diff line
import unittest

import numpy as np
import os
from nose.tools import assert_true

import deepchem as dc
from deepchem.data import NumpyDataset
from deepchem.data.datasets import Databag
from deepchem.models.tensorgraph.layers import ReduceSum 
from deepchem.models.tensorgraph.layers import Feature, Label
from deepchem.models.tensorgraph.layers import ToFloat
from deepchem.models.tensorgraph.layers import NeighborList
from deepchem.models.tensorgraph.layers import ReduceSquareDifference
from deepchem.models.tensorgraph.layers import WeightedLinearCombo
from deepchem.models.tensorgraph.layers import InteratomicL2Distances
from deepchem.models.tensorgraph.tensor_graph import TensorGraph


class TestDocking(unittest.TestCase):
  """
  Test that tensorgraph docking-style models work. 
  """

  def test_neighbor_list(self):
    """Test that neighbor lists can be constructed."""
    N_atoms = 10
    start = 0
    stop = 12
    nbr_cutoff = 3
    ndim = 3
    M = 6
    k = 5
    # The number of cells which we should theoretically have
    n_cells = int(((stop - start) / nbr_cutoff)**ndim)

    X = np.random.rand(N_atoms, ndim)
    y = np.random.rand(N_atoms, 1)
    dataset = NumpyDataset(X, y)

    features = Feature(shape=(N_atoms, ndim))
    labels = Label(shape=(N_atoms,))
    nbr_list = NeighborList(N_atoms, M, ndim, n_cells, k, nbr_cutoff,
                            in_layers=[features])
    nbr_list = ToFloat(in_layers=[nbr_list])
    # This isn't a meaningful loss, but just for test
    loss = ReduceSum(in_layers=[nbr_list])
    tg = dc.models.TensorGraph(use_queue=False)
    tg.add_output(nbr_list)
    tg.set_loss(loss)

    tg.build()

  def test_weighted_combo(self):
    """Tests that weighted linear combinations can be built"""
    N = 10
    n_features = 5

    X1 = NumpyDataset(np.random.rand(N, n_features))
    X2 = NumpyDataset(np.random.rand(N, n_features))
    y = NumpyDataset(np.random.rand(N))

    features_1 = Feature(shape=(None, n_features))
    features_2 = Feature(shape=(None, n_features))
    labels = Label(shape=(None,))

    combo = WeightedLinearCombo(in_layers=[features_1, features_2])
    out = ReduceSum(in_layers=[combo], axis=1)
    loss = ReduceSquareDifference(in_layers=[out, labels])

    databag = Databag({features_1: X1, features_2: X2, labels: y})

    tg = dc.models.TensorGraph(learning_rate=0.1, use_queue=False)
    tg.set_loss(loss)
    tg.fit_generator(databag.iterbatches(epochs=1))

  def test_vina(self):
    """Test that vina graph can be constructed in TensorGraph."""

    prot_coords = Features(shape=(N_protein, 3))
    prot_Z = Features(shape=(N_protein,), dtype=tf.int32)
    ligand_coords = Features(shape=(N_ligand, 3))
    ligand_Z = Features(shape=(N_ligand,), dtype=tf.int32)
    labels = Label(shape=(1,))

    coords = Concat(in_layers=[prot_coords, ligand_coords], axis=0)
    Z = Concat(in_layers=[prot_Z, ligand_Z], axis=0)

    # Now an (N, M) shape
    nbr_list = NeighborList(N_protein+N_ligand, M, ndim, n_cells, k,
                            nbr_cutoff, in_layers=[coords])

  def test_interatomic_distances(self):
    """Test that the interatomic distance calculation works."""
    N_atoms = 5
    M = 2
    ndim = 3

    coords = np.random.rand(N_atoms, ndim)
    nbr_list = np.random.randint(0, N_atoms, size=(N_atoms, M))

    coords_tensor = tf.convert_to_tensor(coords)
    nbr_list_tensor = tf.convert_to_tensor(nbr_list)

    dist_tensor = 
+0 −28
Original line number Diff line number Diff line
@@ -208,31 +208,3 @@ class TestTensorGraph(unittest.TestCase):
    tg1 = TensorGraph.load_from_dir(tg.model_dir)
    prediction2 = np.squeeze(tg1.predict_proba_on_batch(X))
    assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))

  def test_neighbor_list(self):
    N_atoms = 10
    start = 0
    stop = 12
    nbr_cutoff = 3
    ndim = 3
    M = 6
    k = 5
    # The number of cells which we should theoretically have
    n_cells = int(((stop - start) / nbr_cutoff)**ndim)

    X = np.random.rand(N_atoms, ndim)
    y = np.random.rand(N_atoms, 1)
    dataset = NumpyDataset(X, y)

    features = Feature(shape=(N_atoms, ndim))
    labels = Label(shape=(N_atoms,))
    nbr_list = NeighborList(N_atoms, M, ndim, n_cells, k, nbr_cutoff,
                            in_layers=[features])
    # This isn't a meaningful loss, but just for test
    loss = ReduceMean(in_layers=[nbr_list])
    tg = dc.models.TensorGraph(use_queue=False)
    tg.add_output(nbr_list)
    tg.set_loss(loss)

    tg.fit(dataset, nb_epoch=1)