Merge branch 'master' into graphconvsuch-cr (90320cba) · Commits · 钟慕尧 / deepchem

README.md

+2 −2

Original line number	Diff line number	Diff line
		@@ -56,7 +56,7 @@ git clone https://github.com/deepchem/deepchem.git # Clone deepchem source
		cd deepchem
		bash scripts/install_deepchem_conda.sh deepchem
		source activate deepchem
		pip install tensorflow-gpu==1.2.1 # If you want GPU support
		pip install tensorflow-gpu==1.3.0 # If you want GPU support
		python setup.py install # Manual install
		nosetests -v deepchem --nologcapture # Run tests
		```
		@@ -110,7 +110,7 @@ conda install -c deepchem -c rdkit -c conda-forge -c omnia deepchem=1.2.0
		contact your local sysadmin to work out a custom installation. If your
		version of Linux is recent, then the following command will work:
		```
		pip install tensorflow-gpu==1.2.1
		pip install tensorflow-gpu==1.3.0
		```

		9. `deepchem`: Clone the `deepchem` github repo:

deepchem/models/tensorgraph/layers.py

+253 −123

Original line number	Diff line number	Diff line
		import random
		import string
		from collections import Sequence
		from copy import deepcopy

		import tensorflow as tf
		import numpy as np
		@@ -24,6 +25,7 @@ class Layer(object):
		self.in_layers = in_layers
		self.op_type = "gpu"
		self.variable_scope = ''
		self.variable_values = None
		self.rnn_initial_states = []
		self.rnn_final_states = []
		self.rnn_zero_states = []
		@@ -116,6 +118,19 @@ class Layer(object):
		else:
		self.variable_scope = local_scope

		def set_variable_initial_values(self, values):
		"""Set the initial values of all variables.

		This takes a list, which contains the initial values to use for all of
		this layer's values (in the same order retured by
		TensorGraph.get_layer_variables()). When this layer is used in a
		TensorGraph, it will automatically initialize each variable to the value
		specified in the list. Note that some layers also have separate mechanisms
		for specifying variable initializers; this method overrides them. The
		purpose of this method is to let a Layer object represent a pre-trained
		layer, complete with trained values for its variables."""
		self.variable_values = values

		def set_summary(self, summary_op, summary_description=None, collections=None):
		"""Annotates a tensor with a tf.summary operation
		Collects data from self.out_tensor by default but can be changed by setting
		@@ -157,6 +172,64 @@ class Layer(object):
		elif self.summary_op == 'histogram':
		tf.summary.histogram(self.name, self.tb_input, self.collections)

		def copy(self, replacements={}, variables_graph=None):
		"""Duplicate this Layer and all its inputs.

		This creates and returns a clone of this layer. It also recursively calls
		copy() on all of this layer's inputs to clone the entire hierarchy of layers.
		In the process, you can optionally tell it to replace particular layers with
		specific existing ones. For example, you can clone a stack of layers, while
		connecting the topmost ones to different inputs.

		For example, consider a stack of dense layers that depend on an input:

		>>> input = Feature(shape=(None, 100))
		>>> dense1 = Dense(100, in_layers=input)
		>>> dense2 = Dense(100, in_layers=dense1)
		>>> dense3 = Dense(100, in_layers=dense2)

		The following will clone all three dense layers, but not the input layer.
		Instead, the input to the first dense layer will be a different layer
		specified in the replacements map.

		>>> replacements = {input: new_input}
		>>> dense3_copy = dense3.copy(replacements)

		Parameters
		----------
		replacements: map
		specifies existing layers, and the layers to replace them with (instead of
		cloning them). This argument serves two purposes. First, you can pass in
		a list of replacements to control which layers get cloned. In addition,
		as each layer is cloned, it is added to this map. On exit, it therefore
		contains a complete record of all layers that were copied, and a reference
		to the copy of each one.
		variables_graph: TensorGraph
		an optional TensorGraph from which to take variables. If this is specified,
		the current value of each variable in each layer is recorded, and the copy
		has that value specified as its initial value. This allows a piece of a
		pre-trained model to be copied to another model.
		"""
		if self in replacements:
		return replacements[self]
		copied_inputs = [
		layer.copy(replacements, variables_graph) for layer in self.in_layers
		]
		saved_inputs = self.in_layers
		self.in_layers = []
		saved_tensors = self.none_tensors()
		copy = deepcopy(self)
		self.in_layers = saved_inputs
		self.set_tensors(saved_tensors)
		copy.in_layers = copied_inputs
		if variables_graph is not None:
		variables = variables_graph.get_layer_variables(self)
		if len(variables) > 0:
		with variables_graph._get_tf("Graph").as_default():
		values = variables_graph.session.run(variables)
		copy.set_variable_initial_values(values)
		return copy

		def _as_graph_element(self):
		if '_as_graph_element' in dir(self.out_tensor):
		return self.out_tensor._as_graph_element()
		@@ -296,6 +369,7 @@ class Conv1D(Layer):


		class Dense(Layer):

		def __init__(
		self,
		out_channels,
		@@ -426,6 +500,7 @@ class Flatten(Layer):


		class Reshape(Layer):

		def __init__(self, shape, **kwargs):
		super(Reshape, self).__init__(**kwargs)
		self._new_shape = tuple(-1 if x is None else x for x in shape)
		@@ -456,6 +531,7 @@ class Reshape(Layer):


		class Squeeze(Layer):

		def __init__(self, in_layers=None, squeeze_dims=None, **kwargs):
		self.squeeze_dims = squeeze_dims
		super(Squeeze, self).__init__(in_layers, **kwargs)
		@@ -481,6 +557,7 @@ class Squeeze(Layer):


		class Transpose(Layer):

		def __init__(self, perm, **kwargs):
		super(Transpose, self).__init__(**kwargs)
		self.perm = perm
		@@ -544,6 +621,7 @@ class CombineMeanStd(Layer):


		class Repeat(Layer):

		def __init__(self, n_times, **kwargs):
		self.n_times = n_times
		super(Repeat, self).__init__(**kwargs)
		@@ -688,6 +766,7 @@ class GRU(Layer):


		class TimeSeriesDense(Layer):

		def __init__(self, out_channels, **kwargs):
		self.out_channels = out_channels
		super(TimeSeriesDense, self).__init__(**kwargs)
		@@ -707,6 +786,7 @@ class TimeSeriesDense(Layer):


		class Input(Layer):

		def __init__(self, shape, dtype=tf.float32, **kwargs):
		self._shape = tuple(shape)
		self.dtype = dtype
		@@ -736,21 +816,25 @@ class Input(Layer):


		class Feature(Input):

		def __init__(self, **kwargs):
		super(Feature, self).__init__(**kwargs)


		class Label(Input):

		def __init__(self, **kwargs):
		super(Label, self).__init__(**kwargs)


		class Weights(Input):

		def __init__(self, **kwargs):
		super(Weights, self).__init__(**kwargs)


		class L1Loss(Layer):

		def __init__(self, in_layers=None, **kwargs):
		super(L1Loss, self).__init__(in_layers, **kwargs)

		@@ -765,6 +849,7 @@ class L1Loss(Layer):


		class L2Loss(Layer):

		def __init__(self, in_layers=None, **kwargs):
		super(L2Loss, self).__init__(in_layers, **kwargs)
		try:
		@@ -788,6 +873,7 @@ class L2Loss(Layer):


		class SoftMax(Layer):

		def __init__(self, in_layers=None, **kwargs):
		super(SoftMax, self).__init__(in_layers, **kwargs)
		try:
		@@ -807,6 +893,7 @@ class SoftMax(Layer):


		class Concat(Layer):

		def __init__(self, in_layers=None, axis=1, **kwargs):
		self.axis = axis
		super(Concat, self).__init__(in_layers, **kwargs)
		@@ -831,6 +918,7 @@ class Concat(Layer):


		class Stack(Layer):

		def __init__(self, in_layers=None, axis=1, **kwargs):
		self.axis = axis
		super(Stack, self).__init__(in_layers, **kwargs)
		@@ -904,6 +992,36 @@ class Variable(Layer):
		return out_tensor


		class StopGradient(Layer):
		"""Block the flow of gradients.

		This layer copies its input directly to its output, but reports that all
		gradients of its output are zero. This means, for example, that optimizers
		will not try to optimize anything "upstream" of this layer.

		For example, suppose you have pre-trained a stack of layers to perform a
		calculation. You want to use the result of that calculation as the input to
		another layer, but because they are already pre-trained, you do not want the
		optimizer to modify them. You can wrap the output in a StopGradient layer,
		then use that as the input to the next layer."""

		def __init__(self, in_layers=None, **kwargs):
		super(StopGradient, self).__init__(in_layers, **kwargs)
		try:
		self._shape = tuple(self.in_layers[0].shape)
		except:
		pass

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		if len(inputs) > 1:
		raise ValueError("Only one layer supported.")
		out_tensor = tf.stop_gradient(inputs[0])
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor


		def _max_dimension(x, y):
		if x is None:
		return y
		@@ -1052,6 +1170,7 @@ class InteratomicL2Distances(Layer):


		class SparseSoftMaxCrossEntropy(Layer):

		def __init__(self, in_layers=None, **kwargs):
		super(SparseSoftMaxCrossEntropy, self).__init__(in_layers, **kwargs)
		try:
		@@ -1073,6 +1192,7 @@ class SparseSoftMaxCrossEntropy(Layer):


		class SoftMaxCrossEntropy(Layer):

		def __init__(self, in_layers=None, **kwargs):
		super(SoftMaxCrossEntropy, self).__init__(in_layers, **kwargs)
		try:
		@@ -1094,6 +1214,7 @@ class SoftMaxCrossEntropy(Layer):


		class ReduceMean(Layer):

		def __init__(self, in_layers=None, axis=None, **kwargs):
		if axis is not None and not isinstance(axis, Sequence):
		axis = [axis]
		@@ -1124,6 +1245,7 @@ class ReduceMean(Layer):


		class ToFloat(Layer):

		def __init__(self, in_layers=None, **kwargs):
		super(ToFloat, self).__init__(in_layers, **kwargs)
		try:
		@@ -1142,6 +1264,7 @@ class ToFloat(Layer):


		class ReduceSum(Layer):

		def __init__(self, in_layers=None, axis=None, **kwargs):
		if axis is not None and not isinstance(axis, Sequence):
		axis = [axis]
		@@ -1172,6 +1295,7 @@ class ReduceSum(Layer):


		class ReduceSquareDifference(Layer):

		def __init__(self, in_layers=None, axis=None, **kwargs):
		if axis is not None and not isinstance(axis, Sequence):
		axis = [axis]
		@@ -1357,6 +1481,7 @@ class Conv3D(Layer):


		class MaxPool2D(Layer):

		def __init__(self,
		ksize=[1, 2, 2, 1],
		strides=[1, 2, 2, 1],
		@@ -1464,11 +1589,9 @@ class InputFifoQueue(Layer):
		def set_tensors(self, tensors):
		self.queue, self.out_tensor, self.out_tensors, self.close_op = tensors

		def close(self):
		self.queue.close()


		class GraphConv(Layer):

		def __init__(self,
		out_channel,
		min_deg=0,
		@@ -1583,6 +1706,7 @@ class GraphConv(Layer):


		class GraphPool(Layer):

		def __init__(self, min_degree=0, max_degree=10, **kwargs):
		self.min_degree = min_degree
		self.max_degree = max_degree
		@@ -1631,6 +1755,7 @@ class GraphPool(Layer):


		class GraphGather(Layer):

		def __init__(self, batch_size, activation_fn=None, **kwargs):
		self.batch_size = batch_size
		self.activation_fn = activation_fn
		@@ -2059,6 +2184,7 @@ class IterRefLSTMEmbedding(Layer):


		class BatchNorm(Layer):

		def __init__(self, in_layers=None, **kwargs):
		super(BatchNorm, self).__init__(in_layers, **kwargs)
		try:
		@@ -2077,6 +2203,7 @@ class BatchNorm(Layer):


		class BatchNormalization(Layer):

		def __init__(self,
		epsilon=1e-5,
		axis=-1,
		@@ -2120,6 +2247,7 @@ class BatchNormalization(Layer):


		class WeightedError(Layer):

		def __init__(self, in_layers=None, **kwargs):
		super(WeightedError, self).__init__(in_layers, **kwargs)
		self._shape = tuple()
		@@ -2569,6 +2697,7 @@ class NeighborList(Layer):


		class Dropout(Layer):

		def __init__(self, dropout_prob, **kwargs):
		self.dropout_prob = dropout_prob
		super(Dropout, self).__init__(**kwargs)
		@@ -2623,6 +2752,7 @@ class WeightDecay(Layer):


		class AtomicConvolution(Layer):

		def __init__(self,
		atom_types=None,
		radial_params=list(),

deepchem/models/tensorgraph/models/graph_models.py

+44 −4

Original line number	Diff line number	Diff line
		import math

		import numpy as np
		import six
		import tensorflow as tf
		@@ -747,7 +745,48 @@ class GraphConvTensorGraph(TensorGraph):
		d[self.deg_adjs[i - 1]] = multiConvMol.get_deg_adjacency_lists()[i]
		yield d

		def predict_proba_on_generator(self, generator, transformers=[]):
		def predict_on_generator(self, generator, transformers=[], outputs=None):
		if not self.built:
		self.build()
		if outputs is None:
		outputs = self.outputs
		elif not isinstance(outputs, collections.Sequence):
		outputs = [outputs]
		with self._get_tf("Graph").as_default():
		# Gather results for each output
		results = [[] for out in outputs]
		for feed_dict in generator:
		feed_dict = {
		self.layers[k.name].out_tensor: v
		for k, v in six.iteritems(feed_dict)
		}
		# Recording the number of samples in the input batch
		n_samples = max(feed_dict[self.membership.out_tensor]) + 1
		feed_dict[self._training_placeholder] = 0.0
		feed_results = self.session.run(outputs, feed_dict=feed_dict)
		if len(feed_results) > 1:
		if len(transformers):
		raise ValueError("Does not support transformations "
		"for multiple outputs.")
		elif len(feed_results) == 1:
		result = undo_transforms(feed_results[0], transformers)
		feed_results = [result]
		for ind, result in enumerate(feed_results):
		# GraphConvTensorGraph constantly outputs batch_size number of
		# results, only valid samples should be appended to final results
		results[ind].append(result[:n_samples])

		final_results = []
		for result_list in results:
		final_results.append(np.concatenate(result_list, axis=0))
		# If only one output, just return array
		if len(final_results) == 1:
		return final_results[0]
		else:
		return final_results

		def predict_proba_on_generator(self, generator, transformers=[],
		outputs=None):
		if not self.built:
		self.build()
		with self._get_tf("Graph").as_default():
		@@ -758,13 +797,14 @@ class GraphConvTensorGraph(TensorGraph):
		self.layers[k.name].out_tensor: v
		for k, v in six.iteritems(feed_dict)
		}
		n_samples = max(feed_dict[self.membership.out_tensor]) + 1
		feed_dict[self._training_placeholder] = 1.0 ##
		result = np.array(self.session.run(out_tensors, feed_dict=feed_dict))
		if len(result.shape) == 3:
		result = np.transpose(result, axes=[1, 0, 2])
		if len(transformers) > 0:
		result = undo_transforms(result, transformers)
		results.append(result)
		results.append(result[:n_samples])
		return np.concatenate(results, axis=0)

		def evaluate(self, dataset, metrics, transformers=[], per_task_metrics=False):

deepchem/models/tensorgraph/tensor_graph.py

+47 −36

Original line number	Diff line number	Diff line
		@@ -180,38 +180,56 @@ class TensorGraph(Model):
		self.session.run(tf.global_variables_initializer())
		if restore:
		self.restore()
		avg_loss, n_batches = 0.0, 0.0
		else:
		# Initialize variables that have pre-trained values.
		for layer in self.layers.values():
		if layer.variable_values is not None:
		variables = self.get_layer_variables(layer)
		for var, val in zip(variables, layer.variable_values):
		self.session.run(var.assign(val))
		avg_loss, n_averaged_batches = 0.0, 0.0
		coord = tf.train.Coordinator()
		n_samples = 0
		n_enqueued = [0]
		final_sample = [None]
		if self.use_queue:
		enqueue_thread = threading.Thread(
		target=_enqueue_batch,
		args=(self, feed_dict_generator, self._get_tf("Graph"),
		self.session, coord))
		self.session, n_enqueued, final_sample))
		enqueue_thread.start()
		fetches = [train_op, self.loss.out_tensor]
		for feed_dict in create_feed_dict():
		try:
		if self.use_queue:
		# Don't let this thread get ahead of the enqueue thread, since if
		# we try to read more batches than the total number that get queued,
		# this thread will hang indefinitely.
		while n_enqueued[0] <= n_samples:
		if n_samples == final_sample[0]:
		break
		time.sleep(0)
		if n_samples == final_sample[0]:
		break
		n_samples += 1
		should_log = (self.tensorboard and
		n_samples % self.tensorboard_log_frequency == 0)
		fetches = [train_op, self.loss.out_tensor]
		if should_log:
		fetches.append(self._get_tf("summary_op"))
		fetched_values = self.session.run(fetches, feed_dict=feed_dict)
		loss = fetched_values[-1]
		if should_log:
		self._log_tensorboard(fetches[2])
		loss = fetched_values[1]
		avg_loss += loss
		n_batches += 1
		n_averaged_batches += 1
		self.global_step += 1
		n_samples += 1
		if self.tensorboard and n_samples % self.tensorboard_log_frequency == 0:
		summary = self.session.run(
		self._get_tf("summary_op"), feed_dict=feed_dict)
		self._log_tensorboard(summary)
		except OutOfRangeError:
		break
		if self.global_step % checkpoint_interval == checkpoint_interval - 1:
		saver.save(self.session, self.save_file, global_step=self.global_step)
		avg_loss = float(avg_loss) / n_batches
		avg_loss = float(avg_loss) / n_averaged_batches
		print('Ending global_step %d: Average loss %g' % (self.global_step,
		avg_loss))
		avg_loss, n_batches = 0.0, 0.0
		if n_batches > 0:
		avg_loss = float(avg_loss) / n_batches
		avg_loss, n_averaged_batches = 0.0, 0.0
		if n_averaged_batches > 0:
		avg_loss = float(avg_loss) / n_averaged_batches
		print('Ending global_step %d: Average loss %g' % (self.global_step,
		avg_loss))
		saver.save(self.session, self.save_file, global_step=self.global_step)
		@@ -688,7 +706,7 @@ class TensorGraph(Model):
		pass


		def _enqueue_batch(tg, generator, graph, sess, coord):
		def _enqueue_batch(tg, generator, graph, sess, n_enqueued, final_sample):
		"""
		Function to load data into
		Parameters
		@@ -697,7 +715,6 @@ def _enqueue_batch(tg, generator, graph, sess, coord):
		dataset
		graph
		sess
		coord

		Returns
		-------
		@@ -711,14 +728,8 @@ def _enqueue_batch(tg, generator, graph, sess, coord):
		for layer in tg.features + tg.labels + tg.task_weights:
		enq[tg.get_pre_q_input(layer).out_tensor] = feed_dict[layer]
		sess.run(tg.input_queue.out_tensor, feed_dict=enq)
		num_samples += 1
		if tg.tensorboard and num_samples % tg.tensorboard_log_frequency == 0:
		enq = {k.out_tensor: v for k, v in six.iteritems(feed_dict)}
		summary = sess.run(tg._get_tf("summary_op"), feed_dict=enq)
		tg._log_tensorboard(summary)
		sess.run(tg.input_queue.close_op)
		coord.num_samples = num_samples
		coord.request_stop()
		n_enqueued[0] += 1
		final_sample[0] = n_enqueued[0]


		class TFWrapper(object):

deepchem/models/tensorgraph/tests/test_layers.py

+11 −0

Original line number	Diff line number	Diff line
		@@ -39,6 +39,7 @@ from deepchem.models.tensorgraph.layers import Reshape
		from deepchem.models.tensorgraph.layers import SluiceLoss
		from deepchem.models.tensorgraph.layers import SoftMax
		from deepchem.models.tensorgraph.layers import SoftMaxCrossEntropy
		from deepchem.models.tensorgraph.layers import StopGradient
		from deepchem.models.tensorgraph.layers import TensorWrapper
		from deepchem.models.tensorgraph.layers import TimeSeriesDense
		from deepchem.models.tensorgraph.layers import ToFloat
		@@ -241,6 +242,16 @@ class TestLayers(test_util.TensorFlowTestCase):
		sess.run(tf.global_variables_initializer())
		assert np.array_equal(value, out_tensor.eval())

		def test_stop_gradient(self):
		"""Test that StopGradient can be invoked."""
		batch_size = 10
		n_features = 5
		in_tensor = np.random.rand(batch_size, n_features)
		with self.test_session() as sess:
		in_tensor = tf.convert_to_tensor(in_tensor, dtype=tf.float32)
		out_tensor = StopGradient()(in_tensor)
		assert np.array_equal(in_tensor.eval(), out_tensor.eval())

		def test_add(self):
		"""Test that Add can be invoked."""
		value1 = np.random.uniform(size=(2, 3)).astype(np.float32)

Admin message