Commit d471172e authored by peastman's avatar peastman
Browse files

Can use SeqToSeq as a variational autoencoder

parent 8392f55f
Loading
Loading
Loading
Loading
+42 −1
Original line number Diff line number Diff line
@@ -504,9 +504,28 @@ class Transpose(Layer):


class CombineMeanStd(Layer):
  """Generate Gaussian nose."""

  def __init__(self, in_layers=None, **kwargs):
  def __init__(self, in_layers=None, training_only=False, **kwargs):
    """Create a CombineMeanStd layer.

    This layer should have two inputs with the same shape, and its output also has the
    same shape.  Each element of the output is a Gaussian distributed random number
    whose mean is the corresponding element of the first input, and whose standard
    deviation is the corresponding element of the second input.

    Parameters
    ----------
    in_layers: list
      the input layers.  The first one specifies the mean, and the second one specifies
      the standard deviation.
    training_only: bool
      if True, noise is only generated during training.  During prediction, the output
      is simply equal to the first input (that is, the mean of the distribution used
      during training).
    """
    super(CombineMeanStd, self).__init__(in_layers, **kwargs)
    self.training_only = training_only
    try:
      self._shape = self.in_layers[0].shape
    except:
@@ -519,6 +538,8 @@ class CombineMeanStd(Layer):
    mean_parent, std_parent = inputs[0], inputs[1]
    sample_noise = tf.random_normal(
        mean_parent.get_shape(), 0, 1, dtype=tf.float32)
    if self.training_only:
      sample_noise *= kwargs['training']
    out_tensor = mean_parent + (std_parent * sample_noise)
    if set_tensors:
      self.out_tensor = out_tensor
@@ -996,6 +1017,26 @@ class Log(Layer):
    return out_tensor


class Exp(Layer):
  """Compute the exponential of the input."""

  def __init__(self, in_layers=None, **kwargs):
    super(Exp, self).__init__(in_layers, **kwargs)
    try:
      self._shape = self.in_layers[0].shape
    except:
      pass

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
    if len(inputs) != 1:
      raise ValueError('Exp must have a single parent')
    out_tensor = tf.exp(inputs[0])
    if set_tensors:
      self.out_tensor = out_tensor
    return out_tensor


class InteratomicL2Distances(Layer):
  """Compute (squared) L2 Distances between atoms given neighbors."""

+48 −12
Original line number Diff line number Diff line
@@ -67,6 +67,7 @@ class SeqToSeq(TensorGraph):
               embedding_dimension=512,
               dropout=0.0,
               reverse_input=True,
               variational=False,
               **kwargs):
    """Construct a SeqToSeq model.

@@ -93,6 +94,10 @@ class SeqToSeq(TensorGraph):
    reverse_input: bool
      if True, reverse the order of input sequences before sending them into
      the encoder.  This can improve performance when working with long sequences.
    variational: bool
      if True, train the model as a variational autoencoder.  This adds random
      noise to the encoder, and also constrains the embedding to follow a unit
      Gaussian distribution.
    """
    super(SeqToSeq, self).__init__(
        use_queue=False, **kwargs)  # TODO can we make it work with the queue?
@@ -107,30 +112,61 @@ class SeqToSeq(TensorGraph):
    self._max_output_length = max_output_length
    self._features = layers.Feature(shape=(None, None, len(input_tokens)))
    self._labels = layers.Label(shape=(None, None, len(output_tokens)))
    self._gather_indices = layers.Feature(shape=(None, 2), dtype=tf.int32)
    self._gather_indices = layers.Feature(
        shape=(self.batch_size, 2), dtype=tf.int32)
    self._reverse_input = reverse_input
    self._variational = variational
    self.embedding = self._create_encoder(encoder_layers, dropout,
                                          embedding_dimension)
    self.output = self._create_decoder(decoder_layers, dropout,
                                       embedding_dimension)
    self.set_loss(self._create_loss())
    self.add_output(self.output)

  def _create_encoder(self, n_layers, dropout, embedding_dimension):
    """Create the encoder layers."""
    prev_layer = self._features
    for i in range(encoder_layers):
    for i in range(n_layers):
      if dropout > 0.0:
        prev_layer = layers.Dropout(dropout, in_layers=prev_layer)
      prev_layer = layers.GRU(
          embedding_dimension, self.batch_size, in_layers=prev_layer)
    prev_layer = layers.Gather(in_layers=[prev_layer, self._gather_indices])
    self.embedding = prev_layer
    prev_layer = layers.Repeat(max_output_length, in_layers=prev_layer)
    for i in range(decoder_layers):
    if self._variational:
      self._embedding_mean = layers.Dense(
          embedding_dimension, in_layers=prev_layer)
      self._embedding_stddev = layers.Dense(
          embedding_dimension, in_layers=prev_layer)
      prev_layer = layers.CombineMeanStd(
          [self._embedding_mean, self._embedding_stddev])
    return prev_layer

  def _create_decoder(self, n_layers, dropout, embedding_dimension):
    """Create the decoder layers."""
    prev_layer = layers.Repeat(
        self._max_output_length, in_layers=self.embedding)
    for i in range(n_layers):
      if dropout > 0.0:
        prev_layer = layers.Dropout(dropout, in_layers=prev_layer)
      prev_layer = layers.GRU(
          embedding_dimension, self.batch_size, in_layers=prev_layer)
    output_layer = layers.Dense(
        len(output_tokens), in_layers=prev_layer, activation_fn=tf.nn.softmax)
    self.add_output(output_layer)
    prob = layers.ReduceSum(output_layer * self._labels, axis=2)
    log_prob = layers.Log(prob + np.finfo(np.float32).eps)
    return layers.Dense(
        len(self._output_tokens),
        in_layers=prev_layer,
        activation_fn=tf.nn.softmax)

  def _create_loss(self):
    """Create the loss function."""
    prob = layers.ReduceSum(self.output * self._labels, axis=2)
    mask = layers.ReduceSum(self._labels, axis=2)
    log_prob = layers.Log(prob + 1e-20) * mask
    loss = -layers.ReduceMean(layers.ReduceSum(log_prob, axis=1))
    self.set_loss(loss)
    self.output = output_layer
    if self._variational:
      mean_sq = self._embedding_mean * self._embedding_mean
      stddev_sq = self._embedding_stddev * self._embedding_stddev
      kl = mean_sq + stddev_sq - layers.Log(stddev_sq) - 1
      loss += 0.5 * layers.ReduceMean(layers.ReduceSum(kl, axis=1))
    return loss

  def fit_sequences(self,
                    generator,
+8 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ from deepchem.models.tensorgraph.layers import Constant
from deepchem.models.tensorgraph.layers import Conv1D, Squeeze
from deepchem.models.tensorgraph.layers import Conv2D
from deepchem.models.tensorgraph.layers import Dense
from deepchem.models.tensorgraph.layers import Exp
from deepchem.models.tensorgraph.layers import Flatten
from deepchem.models.tensorgraph.layers import GRU
from deepchem.models.tensorgraph.layers import Gather
@@ -267,6 +268,13 @@ class TestLayers(test_util.TensorFlowTestCase):
      result = Log()(value).eval()
      assert np.array_equal(np.log(value), result)

  def test_exp(self):
    """Test that Exp can be invoked."""
    value = np.random.uniform(size=(2, 3)).astype(np.float32)
    with self.test_session() as sess:
      result = Exp()(value).eval()
      assert np.array_equal(np.exp(value), result)

  def test_interatomic_distances(self):
    """Test that the interatomic distance calculation works."""
    N_atoms = 5
+11 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ from deepchem.models.tensorgraph.graph_layers import Combine_AP, Separate_AP, \
  DTNNExtract, DAGLayer, DAGGather, MessagePassing, SetGather
from deepchem.models.tensorgraph.layers import Feature, Conv1D, Dense, Flatten, Reshape, Squeeze, Transpose, \
  CombineMeanStd, Repeat, Gather, GRU, L2Loss, Concat, SoftMax, \
  Constant, Variable, Add, Multiply, Log, InteratomicL2Distances, \
  Constant, Variable, Add, Multiply, Log, Exp, InteratomicL2Distances, \
  SoftMaxCrossEntropy, ReduceMean, ToFloat, ReduceSquareDifference, Conv2D, MaxPool2D, ReduceSum, GraphConv, GraphPool, \
  GraphGather, BatchNorm, WeightedError, \
  Conv3D, MaxPool3D, \
@@ -177,6 +177,16 @@ def test_Log_pickle():
  tg.save()


def test_Exp_pickle():
  tg = TensorGraph()
  feature = Feature(shape=(tg.batch_size, 1))
  layer = Exp(feature)
  tg.add_output(layer)
  tg.set_loss(layer)
  tg.build()
  tg.save()


def testInteratomicL2Distances():
  """
    TODO(LESWING) what is ndim here?
+35 −10
Original line number Diff line number Diff line
@@ -3,6 +3,15 @@ import numpy as np
import unittest


def generate_sequences(sequence_length, num_sequences):
  for i in range(num_sequences):
    seq = [
        np.random.randint(10)
        for x in range(np.random.randint(1, sequence_length + 1))
    ]
    yield (seq, seq)


class TestSeqToSeq(unittest.TestCase):

  def test_int_sequence(self):
@@ -24,21 +33,13 @@ class TestSeqToSeq(unittest.TestCase):
    # really make it reliable, but I want to keep this test fast, and it should
    # still be able to reproduce a reasonable fraction of input sequences.

    def generate_sequences(num_sequences):
      for i in range(num_sequences):
        seq = [
            np.random.randint(10)
            for x in range(np.random.randint(1, sequence_length + 1))
        ]
        yield (seq, seq)

    s.fit_sequences(generate_sequences(25000))
    s.fit_sequences(generate_sequences(sequence_length, 25000))

    # Test it out.

    count1 = 0
    count4 = 0
    for sequence, target in generate_sequences(50):
    for sequence, target in generate_sequences(sequence_length, 50):
      pred1 = s.predict_from_sequence(sequence, beam_width=1)
      pred4 = s.predict_from_sequence(sequence, beam_width=4)
      if pred1 == sequence:
@@ -53,3 +54,27 @@ class TestSeqToSeq(unittest.TestCase):

    assert count1 >= 12
    assert count4 >= 12

  def test_variational(self):
    """Test using a SeqToSeq model as a variational autoenconder."""

    sequence_length = 10
    tokens = list(range(10))
    s = dc.models.SeqToSeq(
        tokens,
        tokens,
        sequence_length,
        encoder_layers=2,
        decoder_layers=2,
        embedding_dimension=128,
        learning_rate=0.01)

    # Actually training a VAE takes far too long for a unit test.  Just run a
    # few steps of training to make sure nothing crashes, then check that the
    # results are at least internally consistent.

    s.fit_sequences(generate_sequences(sequence_length, 1000))
    for sequence, target in generate_sequences(sequence_length, 10):
      pred1 = s.predict_from_sequence(sequence, beam_width=1)
      embedding = s.predict_embedding(sequence)
      assert pred1 == s.predict_from_embedding(embedding, beam_width=1)