Eliminated Shared, implemented shared() for convolutional layers (8df1c80c) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/layers.py

+130 −117

Original line number	Diff line number	Diff line
		@@ -27,6 +27,7 @@ class Layer(object):
		self.op_type = "gpu"
		self.variable_scope = ''
		self.variable_values = None
		self.out_tensor = None
		self.rnn_initial_states = []
		self.rnn_final_states = []
		self.rnn_zero_states = []
		@@ -51,9 +52,24 @@ class Layer(object):
		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		raise NotImplementedError("Subclasses must implement for themselves")

		def clone(self, in_layers):
		"""Create a copy of this layer with different inputs."""
		saved_inputs = self.in_layers
		self.in_layers = []
		saved_tensors = self.none_tensors()
		copy = deepcopy(self)
		self.in_layers = saved_inputs
		self.set_tensors(saved_tensors)
		copy.in_layers = in_layers
		return copy

		def shared(self, in_layers):
		"""
		Share weights with different in tensors and a new out tensor
		Create a copy of this layer that shares variables with it.

		This is similar to clone(), but where clone() creates two independent layers,
		this causes the layers to share variables with each other.

		Parameters
		----------
		in_layers: list tensor
		@@ -63,7 +79,9 @@ class Layer(object):
		-------
		Layer
		"""
		return Shared(self, in_layers=in_layers)
		if self.variable_scope == '':
		return self.clone(in_layers)
		raise ValueError('%s does not implement shared()' % self.__class__.__name__)

		def __call__(self, *in_layers):
		return self.create_tensor(in_layers=in_layers, set_tensors=False)
		@@ -176,11 +194,11 @@ class Layer(object):
		def copy(self, replacements={}, variables_graph=None, shared=False):
		"""Duplicate this Layer and all its inputs.

		This creates and returns a clone of this layer. It also recursively calls
		copy() on all of this layer's inputs to clone the entire hierarchy of layers.
		In the process, you can optionally tell it to replace particular layers with
		specific existing ones. For example, you can clone a stack of layers, while
		connecting the topmost ones to different inputs.
		This is similar to clone(), but instead of only cloning one layer, it also
		recursively calls copy() on all of this layer's inputs to clone the entire
		hierarchy of layers. In the process, you can optionally tell it to replace
		particular layers with specific existing ones. For example, you can clone a
		stack of layers, while connecting the topmost ones to different inputs.

		For example, consider a stack of dense layers that depend on an input:

		@@ -211,7 +229,7 @@ class Layer(object):
		has that value specified as its initial value. This allows a piece of a
		pre-trained model to be copied to another model.
		shared: bool
		if True, create Shared layers instead of directly cloning the input layers.
		if True, create new layers by calling shared() on the input layers.
		This means the newly created layers will share variables with the original
		ones.
		"""
		@@ -222,15 +240,9 @@ class Layer(object):
		for layer in self.in_layers
		]
		if shared:
		copy = Shared(self, in_layers=copied_inputs)
		copy = self.shared(copied_inputs)
		else:
		saved_inputs = self.in_layers
		self.in_layers = []
		saved_tensors = self.none_tensors()
		copy = deepcopy(self)
		self.in_layers = saved_inputs
		self.set_tensors(saved_tensors)
		copy.in_layers = copied_inputs
		copy = self.clone(copied_inputs)
		if variables_graph is not None:
		if shared:
		raise ValueError('Cannot specify variables_graph when shared==True')
		@@ -302,9 +314,9 @@ class TensorWrapper(Layer):
		"""Used to wrap a tensorflow tensor."""

		def __init__(self, out_tensor, **kwargs):
		super(TensorWrapper, self).__init__(**kwargs)
		self.out_tensor = out_tensor
		self._shape = out_tensor.get_shape().as_list()
		super(TensorWrapper, self).__init__(**kwargs)

		def create_tensor(self, in_layers=None, **kwargs):
		"""Take no actions."""
		@@ -324,46 +336,31 @@ def convert_to_layers(in_layers):
		return layers


		class Shared(Layer):
		"""A copy of another layer that shares variables with it.
		class SharedVariableScope(Layer):
		"""A Layer that can share variables with another layer via name scope.

		A Shared layer duplicates all the computations of another layer so those
		computations may be performed on a second set of inputs. It does this while
		sharing variables with the original layer.
		This abstract class can be used as a parent for any layer that implements
		shared() by means of the variable name scope. It exists to avoid duplicated
		code.
		"""

		def __init__(self, original_layer, **kwargs):
		"""Create a Shared layer.
		def __init__(self, **kwargs):
		super(SharedVariableScope, self).__init__(**kwargs)
		self._reuse = False
		self._shared_with = None

		Parameters
		----------
		original_layer: Layer
		the Layer whose computations this layer should duplicate, and with which
		it should share variables
		"""
		super(Shared, self).__init__(**kwargs)
		self.original_layer = original_layer
		def shared(self, in_layers):
		copy = self.clone(in_layers)
		self._reuse = True
		copy._reuse = True
		copy._shared_with = self
		return copy

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		if len(inputs) != len(self.original_layer.in_layers):
		raise ValueError(
		"Shared must have the same number of inputs as the original layer")
		replacements = {}
		for original, input in zip(self.original_layer.in_layers, inputs):
		replacements[original.out_tensor] = input
		if self.original_layer.variable_scope != '':
		for var in tf.get_collection(
		tf.GraphKeys.TRAINABLE_VARIABLES,
		scope=self.original_layer.variable_scope):
		var_tensor = tf.convert_to_tensor(var)
		replacements[var_tensor] = var_tensor
		out_tensor = tf.contrib.graph_editor.graph_replace(
		self.original_layer.out_tensor, replacements)
		if set_tensors:
		self._record_variable_scope(self.original_layer.variable_scope)
		self.out_tensor = out_tensor
		return out_tensor
		def _get_scope_name(self):
		if self._shared_with is None:
		return self.name
		else:
		return self._shared_with._get_scope_name()


		class Conv1D(Layer):
		@@ -431,7 +428,7 @@ class Conv1D(Layer):
		return out_tensor


		class Dense(Layer):
		class Dense(SharedVariableScope):

		def __init__(
		self,
		@@ -474,8 +471,6 @@ class Dense(Layer):
		self._shape = tuple(parent_shape[:-1]) + (out_channels,)
		except:
		pass
		self._reuse = False
		self._shared_with = None

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		@@ -512,25 +507,6 @@ class Dense(Layer):
		self.out_tensor = out_tensor
		return out_tensor

		def shared(self, in_layers):
		copy = Dense(
		self.out_channels,
		self.activation_fn,
		self.biases_initializer,
		self.weights_initializer,
		time_series=self.time_series,
		in_layers=in_layers)
		self._reuse = True
		copy._reuse = True
		copy._shared_with = self
		return copy

		def _get_scope_name(self):
		if self._shared_with is None:
		return self.name
		else:
		return self._shared_with._get_scope_name()


		class Flatten(Layer):
		"""Flatten every dimension except the first"""
		@@ -1413,7 +1389,7 @@ class ReduceSquareDifference(Layer):
		return out_tensor


		class Conv2D(Layer):
		class Conv2D(SharedVariableScope):
		"""A 2D convolution on the input.

		This layer expects its input to be a four dimensional tensor of shape (batch size, height, width, # channels).
		@@ -1475,6 +1451,8 @@ class Conv2D(Layer):
		parent_tensor = inputs[0]
		if len(parent_tensor.get_shape()) == 3:
		parent_tensor = tf.expand_dims(parent_tensor, 3)
		for reuse in (self._reuse, False):
		try:
		out_tensor = tf.contrib.layers.conv2d(
		parent_tensor,
		num_outputs=self.num_outputs,
		@@ -1483,15 +1461,22 @@ class Conv2D(Layer):
		padding=self.padding,
		activation_fn=self.activation_fn,
		normalizer_fn=self.normalizer_fn,
		scope=self.scope_name)
		out_tensor = out_tensor
		scope=self._get_scope_name(),
		reuse=reuse)
		break
		except ValueError:
		if reuse:
		# This probably means the variable hasn't been created yet, so try again
		# with reuse set to false.
		continue
		raise
		if set_tensors:
		self._record_variable_scope(self.scope_name)
		self.out_tensor = out_tensor
		return out_tensor


		class Conv3D(Layer):
		class Conv3D(SharedVariableScope):
		"""A 3D convolution on the input.

		This layer expects its input to be a five dimensional tensor of shape
		@@ -1555,6 +1540,8 @@ class Conv3D(Layer):
		parent_tensor = inputs[0]
		if len(parent_tensor.get_shape()) == 4:
		parent_tensor = tf.expand_dims(parent_tensor, 4)
		for reuse in (self._reuse, False):
		try:
		out_tensor = tf.layers.conv3d(
		parent_tensor,
		filters=self.num_outputs,
		@@ -1563,7 +1550,15 @@ class Conv3D(Layer):
		padding=self.padding,
		activation=self.activation_fn,
		activity_regularizer=self.normalizer_fn,
		name=self.scope_name)
		name=self._get_scope_name(),
		reuse=reuse)
		break
		except ValueError:
		if reuse:
		# This probably means the variable hasn't been created yet, so try again
		# with reuse set to false.
		continue
		raise
		out_tensor = out_tensor
		if set_tensors:
		self._record_variable_scope(self.scope_name)
		@@ -1571,7 +1566,7 @@ class Conv3D(Layer):
		return out_tensor


		class Conv2DTranspose(Layer):
		class Conv2DTranspose(SharedVariableScope):
		"""A transposed 2D convolution on the input.

		This layer is typically used for upsampling in a deconvolutional network. It
		@@ -1634,6 +1629,8 @@ class Conv2DTranspose(Layer):
		parent_tensor = inputs[0]
		if len(parent_tensor.get_shape()) == 3:
		parent_tensor = tf.expand_dims(parent_tensor, 3)
		for reuse in (self._reuse, False):
		try:
		out_tensor = tf.contrib.layers.conv2d_transpose(
		parent_tensor,
		num_outputs=self.num_outputs,
		@@ -1642,15 +1639,22 @@ class Conv2DTranspose(Layer):
		padding=self.padding,
		activation_fn=self.activation_fn,
		normalizer_fn=self.normalizer_fn,
		scope=self.scope_name)
		out_tensor = out_tensor
		scope=self._get_scope_name(),
		reuse=reuse)
		break
		except ValueError:
		if reuse:
		# This probably means the variable hasn't been created yet, so try again
		# with reuse set to false.
		continue
		raise
		if set_tensors:
		self._record_variable_scope(self.scope_name)
		self.out_tensor = out_tensor
		return out_tensor


		class Conv3DTranspose(Layer):
		class Conv3DTranspose(SharedVariableScope):
		"""A transposed 3D convolution on the input.

		This layer is typically used for upsampling in a deconvolutional network. It
		@@ -1714,6 +1718,8 @@ class Conv3DTranspose(Layer):
		parent_tensor = inputs[0]
		if len(parent_tensor.get_shape()) == 4:
		parent_tensor = tf.expand_dims(parent_tensor, 4)
		for reuse in (self._reuse, False):
		try:
		out_tensor = tf.layers.conv3d_transpose(
		parent_tensor,
		filters=self.num_outputs,
		@@ -1722,8 +1728,15 @@ class Conv3DTranspose(Layer):
		padding=self.padding,
		activation=self.activation_fn,
		activity_regularizer=self.normalizer_fn,
		name=self.scope_name)
		out_tensor = out_tensor
		name=self._get_scope_name(),
		reuse=reuse)
		break
		except ValueError:
		if reuse:
		# This probably means the variable hasn't been created yet, so try again
		# with reuse set to false.
		continue
		raise
		if set_tensors:
		self._record_variable_scope(self.scope_name)
		self.out_tensor = out_tensor

deepchem/models/tensorgraph/tensor_graph.py

+1 −3

Original line number	Diff line number	Diff line
		@@ -12,7 +12,7 @@ from tensorflow.python.framework.errors_impl import OutOfRangeError
		from deepchem.data import NumpyDataset
		from deepchem.metrics import to_one_hot, from_one_hot
		from deepchem.models.models import Model
		from deepchem.models.tensorgraph.layers import InputFifoQueue, Label, Feature, Weights, Constant, Shared
		from deepchem.models.tensorgraph.layers import InputFifoQueue, Label, Feature, Weights, Constant
		from deepchem.models.tensorgraph.optimizers import Adam
		from deepchem.trans import undo_transforms
		from deepchem.utils.evaluate import GeneratorEvaluator
		@@ -442,8 +442,6 @@ class TensorGraph(Model):
		def add_layers_to_list(layer, sorted_layers):
		if layer in sorted_layers:
		return
		if isinstance(layer, Shared):
		add_layers_to_list(layer.original_layer, sorted_layers)
		for in_layer in layer.in_layers:
		add_layers_to_list(in_layer, sorted_layers)
		sorted_layers.append(layer)

deepchem/models/tensorgraph/tests/test_tensor_graph.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -374,8 +374,8 @@ class TestTensorGraph(unittest.TestCase):
		replacements = {features: features, constant: Constant(20.0)}
		copy = output.copy(replacements, shared=True)
		tg.add_output(copy)
		assert isinstance(copy, Shared)
		assert isinstance(copy.in_layers[0], Shared)
		assert isinstance(copy, Add)
		assert isinstance(copy.in_layers[0], Dense)
		assert isinstance(copy.in_layers[0].in_layers[0], Feature)
		assert copy.in_layers[1] == replacements[constant]
		variables1 = tg.get_layer_variables(dense)

Admin message