Can use SeqToSeq as a variational autoencoder (d471172e) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/layers.py

+42 −1

Original line number	Diff line number	Diff line
		@@ -504,9 +504,28 @@ class Transpose(Layer):


		class CombineMeanStd(Layer):
		"""Generate Gaussian nose."""

		def __init__(self, in_layers=None, **kwargs):
		def __init__(self, in_layers=None, training_only=False, **kwargs):
		"""Create a CombineMeanStd layer.

		This layer should have two inputs with the same shape, and its output also has the
		same shape. Each element of the output is a Gaussian distributed random number
		whose mean is the corresponding element of the first input, and whose standard
		deviation is the corresponding element of the second input.

		Parameters
		----------
		in_layers: list
		the input layers. The first one specifies the mean, and the second one specifies
		the standard deviation.
		training_only: bool
		if True, noise is only generated during training. During prediction, the output
		is simply equal to the first input (that is, the mean of the distribution used
		during training).
		"""
		super(CombineMeanStd, self).__init__(in_layers, **kwargs)
		self.training_only = training_only
		try:
		self._shape = self.in_layers[0].shape
		except:
		@@ -519,6 +538,8 @@ class CombineMeanStd(Layer):
		mean_parent, std_parent = inputs[0], inputs[1]
		sample_noise = tf.random_normal(
		mean_parent.get_shape(), 0, 1, dtype=tf.float32)
		if self.training_only:
		sample_noise *= kwargs['training']
		out_tensor = mean_parent + (std_parent * sample_noise)
		if set_tensors:
		self.out_tensor = out_tensor
		@@ -996,6 +1017,26 @@ class Log(Layer):
		return out_tensor


		class Exp(Layer):
		"""Compute the exponential of the input."""

		def __init__(self, in_layers=None, **kwargs):
		super(Exp, self).__init__(in_layers, **kwargs)
		try:
		self._shape = self.in_layers[0].shape
		except:
		pass

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		if len(inputs) != 1:
		raise ValueError('Exp must have a single parent')
		out_tensor = tf.exp(inputs[0])
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor


		class InteratomicL2Distances(Layer):
		"""Compute (squared) L2 Distances between atoms given neighbors."""

deepchem/models/tensorgraph/models/seqtoseq.py

+48 −12

Original line number	Diff line number	Diff line
		@@ -67,6 +67,7 @@ class SeqToSeq(TensorGraph):
		embedding_dimension=512,
		dropout=0.0,
		reverse_input=True,
		variational=False,
		**kwargs):
		"""Construct a SeqToSeq model.

		@@ -93,6 +94,10 @@ class SeqToSeq(TensorGraph):
		reverse_input: bool
		if True, reverse the order of input sequences before sending them into
		the encoder. This can improve performance when working with long sequences.
		variational: bool
		if True, train the model as a variational autoencoder. This adds random
		noise to the encoder, and also constrains the embedding to follow a unit
		Gaussian distribution.
		"""
		super(SeqToSeq, self).__init__(
		use_queue=False, **kwargs) # TODO can we make it work with the queue?
		@@ -107,30 +112,61 @@ class SeqToSeq(TensorGraph):
		self._max_output_length = max_output_length
		self._features = layers.Feature(shape=(None, None, len(input_tokens)))
		self._labels = layers.Label(shape=(None, None, len(output_tokens)))
		self._gather_indices = layers.Feature(shape=(None, 2), dtype=tf.int32)
		self._gather_indices = layers.Feature(
		shape=(self.batch_size, 2), dtype=tf.int32)
		self._reverse_input = reverse_input
		self._variational = variational
		self.embedding = self._create_encoder(encoder_layers, dropout,
		embedding_dimension)
		self.output = self._create_decoder(decoder_layers, dropout,
		embedding_dimension)
		self.set_loss(self._create_loss())
		self.add_output(self.output)

		def _create_encoder(self, n_layers, dropout, embedding_dimension):
		"""Create the encoder layers."""
		prev_layer = self._features
		for i in range(encoder_layers):
		for i in range(n_layers):
		if dropout > 0.0:
		prev_layer = layers.Dropout(dropout, in_layers=prev_layer)
		prev_layer = layers.GRU(
		embedding_dimension, self.batch_size, in_layers=prev_layer)
		prev_layer = layers.Gather(in_layers=[prev_layer, self._gather_indices])
		self.embedding = prev_layer
		prev_layer = layers.Repeat(max_output_length, in_layers=prev_layer)
		for i in range(decoder_layers):
		if self._variational:
		self._embedding_mean = layers.Dense(
		embedding_dimension, in_layers=prev_layer)
		self._embedding_stddev = layers.Dense(
		embedding_dimension, in_layers=prev_layer)
		prev_layer = layers.CombineMeanStd(
		[self._embedding_mean, self._embedding_stddev])
		return prev_layer

		def _create_decoder(self, n_layers, dropout, embedding_dimension):
		"""Create the decoder layers."""
		prev_layer = layers.Repeat(
		self._max_output_length, in_layers=self.embedding)
		for i in range(n_layers):
		if dropout > 0.0:
		prev_layer = layers.Dropout(dropout, in_layers=prev_layer)
		prev_layer = layers.GRU(
		embedding_dimension, self.batch_size, in_layers=prev_layer)
		output_layer = layers.Dense(
		len(output_tokens), in_layers=prev_layer, activation_fn=tf.nn.softmax)
		self.add_output(output_layer)
		prob = layers.ReduceSum(output_layer * self._labels, axis=2)
		log_prob = layers.Log(prob + np.finfo(np.float32).eps)
		return layers.Dense(
		len(self._output_tokens),
		in_layers=prev_layer,
		activation_fn=tf.nn.softmax)

		def _create_loss(self):
		"""Create the loss function."""
		prob = layers.ReduceSum(self.output * self._labels, axis=2)
		mask = layers.ReduceSum(self._labels, axis=2)
		log_prob = layers.Log(prob + 1e-20) * mask
		loss = -layers.ReduceMean(layers.ReduceSum(log_prob, axis=1))
		self.set_loss(loss)
		self.output = output_layer
		if self._variational:
		mean_sq = self._embedding_mean * self._embedding_mean
		stddev_sq = self._embedding_stddev * self._embedding_stddev
		kl = mean_sq + stddev_sq - layers.Log(stddev_sq) - 1
		loss += 0.5 * layers.ReduceMean(layers.ReduceSum(kl, axis=1))
		return loss

		def fit_sequences(self,
		generator,

deepchem/models/tensorgraph/tests/test_layers.py

+8 −0

Original line number	Diff line number	Diff line
		@@ -16,6 +16,7 @@ from deepchem.models.tensorgraph.layers import Constant
		from deepchem.models.tensorgraph.layers import Conv1D, Squeeze
		from deepchem.models.tensorgraph.layers import Conv2D
		from deepchem.models.tensorgraph.layers import Dense
		from deepchem.models.tensorgraph.layers import Exp
		from deepchem.models.tensorgraph.layers import Flatten
		from deepchem.models.tensorgraph.layers import GRU
		from deepchem.models.tensorgraph.layers import Gather
		@@ -267,6 +268,13 @@ class TestLayers(test_util.TensorFlowTestCase):
		result = Log()(value).eval()
		assert np.array_equal(np.log(value), result)

		def test_exp(self):
		"""Test that Exp can be invoked."""
		value = np.random.uniform(size=(2, 3)).astype(np.float32)
		with self.test_session() as sess:
		result = Exp()(value).eval()
		assert np.array_equal(np.exp(value), result)

		def test_interatomic_distances(self):
		"""Test that the interatomic distance calculation works."""
		N_atoms = 5

deepchem/models/tensorgraph/tests/test_layers_pickle.py

+11 −1

Original line number	Diff line number	Diff line
		@@ -7,7 +7,7 @@ from deepchem.models.tensorgraph.graph_layers import Combine_AP, Separate_AP, \
		DTNNExtract, DAGLayer, DAGGather, MessagePassing, SetGather
		from deepchem.models.tensorgraph.layers import Feature, Conv1D, Dense, Flatten, Reshape, Squeeze, Transpose, \
		CombineMeanStd, Repeat, Gather, GRU, L2Loss, Concat, SoftMax, \
		Constant, Variable, Add, Multiply, Log, InteratomicL2Distances, \
		Constant, Variable, Add, Multiply, Log, Exp, InteratomicL2Distances, \
		SoftMaxCrossEntropy, ReduceMean, ToFloat, ReduceSquareDifference, Conv2D, MaxPool2D, ReduceSum, GraphConv, GraphPool, \
		GraphGather, BatchNorm, WeightedError, \
		Conv3D, MaxPool3D, \
		@@ -177,6 +177,16 @@ def test_Log_pickle():
		tg.save()


		def test_Exp_pickle():
		tg = TensorGraph()
		feature = Feature(shape=(tg.batch_size, 1))
		layer = Exp(feature)
		tg.add_output(layer)
		tg.set_loss(layer)
		tg.build()
		tg.save()


		def testInteratomicL2Distances():
		"""
		TODO(LESWING) what is ndim here?

deepchem/models/tensorgraph/tests/test_seqtoseq.py

+35 −10

Original line number	Diff line number	Diff line
		@@ -3,6 +3,15 @@ import numpy as np
		import unittest


		def generate_sequences(sequence_length, num_sequences):
		for i in range(num_sequences):
		seq = [
		np.random.randint(10)
		for x in range(np.random.randint(1, sequence_length + 1))
		]
		yield (seq, seq)


		class TestSeqToSeq(unittest.TestCase):

		def test_int_sequence(self):
		@@ -24,21 +33,13 @@ class TestSeqToSeq(unittest.TestCase):
		# really make it reliable, but I want to keep this test fast, and it should
		# still be able to reproduce a reasonable fraction of input sequences.

		def generate_sequences(num_sequences):
		for i in range(num_sequences):
		seq = [
		np.random.randint(10)
		for x in range(np.random.randint(1, sequence_length + 1))
		]
		yield (seq, seq)

		s.fit_sequences(generate_sequences(25000))
		s.fit_sequences(generate_sequences(sequence_length, 25000))

		# Test it out.

		count1 = 0
		count4 = 0
		for sequence, target in generate_sequences(50):
		for sequence, target in generate_sequences(sequence_length, 50):
		pred1 = s.predict_from_sequence(sequence, beam_width=1)
		pred4 = s.predict_from_sequence(sequence, beam_width=4)
		if pred1 == sequence:
		@@ -53,3 +54,27 @@ class TestSeqToSeq(unittest.TestCase):

		assert count1 >= 12
		assert count4 >= 12

		def test_variational(self):
		"""Test using a SeqToSeq model as a variational autoenconder."""

		sequence_length = 10
		tokens = list(range(10))
		s = dc.models.SeqToSeq(
		tokens,
		tokens,
		sequence_length,
		encoder_layers=2,
		decoder_layers=2,
		embedding_dimension=128,
		learning_rate=0.01)

		# Actually training a VAE takes far too long for a unit test. Just run a
		# few steps of training to make sure nothing crashes, then check that the
		# results are at least internally consistent.

		s.fit_sequences(generate_sequences(sequence_length, 1000))
		for sequence, target in generate_sequences(sequence_length, 10):
		pred1 = s.predict_from_sequence(sequence, beam_width=1)
		embedding = s.predict_embedding(sequence)
		assert pred1 == s.predict_from_embedding(embedding, beam_width=1)

Admin message