Merge pull request #1393 from peastman/saliency (587f5f09) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/tensor_graph.py

+55 −0

Original line number	Diff line number	Diff line
		@@ -591,6 +591,61 @@ class TensorGraph(Model):
		else:
		return zip(output, std)

		def compute_saliency(self, X):
		"""Compute the saliency map for an input sample.

		This computes the Jacobian matrix with the derivative of each output element
		with respect to each input element. More precisely,

		- If this model has a single Feature layer and a single output, this returns
		a matrix of shape (output_size, feature_size) with the derivatives.
		- If this model has multiple Features or outputs, this returns a list of
		matrices, where element i*n_features+j contains the derivatives of the
		i'th output with respect to the j'th Feature layer.

		If an output or Feature has more than one dimension per sample, the matrix
		corresponds to its elements in flattened order.

		Parameters
		----------
		X: ndarray
		the input data for a single sample

		Returns
		-------
		the Jacobian matrix, or a list of matrices
		"""

		def jacobian(y, x):
		# Adapted from https://github.com/tensorflow/tensorflow/issues/675#issuecomment-319891923.
		# The next release of Tensorflow will add a proper jacobian() function, so
		# we can remove this then.
		y = tf.reshape(tf.convert_to_tensor(y)[0], [-1])
		n = y.shape[0]
		loop_vars = [tf.constant(0, tf.int32), tf.TensorArray(tf.float32, size=n)]
		_, jacobian = tf.while_loop(
		lambda j, _: j < n,
		lambda j, result: (j + 1, result.write(j, tf.gradients(y[j], x))),
		loop_vars)
		return jacobian.stack()

		if not self.built:
		self.build()
		grads = []
		with self._get_tf("Graph").as_default():
		for output in self.default_outputs:
		for feature in self.features:
		grads.append(jacobian(output, feature))
		X = np.reshape(X, [1] + list(X.shape))
		result = self.predict_on_batch(X, outputs=grads)
		# Remove extra dimensions, because I couldn't figure out how to get the
		# jacobian() function to not produce them.
		if isinstance(result, list):
		result = [np.squeeze(x, (1, 2)) for x in result]
		else:
		result = np.squeeze(result, (1, 2))
		return result

		def topsort(self):

		def add_layers_to_list(layer, sorted_layers):

deepchem/models/tensorgraph/tests/test_tensor_graph.py

+28 −0

Original line number	Diff line number	Diff line
		@@ -548,3 +548,31 @@ class TestTensorGraph(unittest.TestCase):
		output1 = tg.predict_on_batch(input)
		output2 = tg(input)
		assert np.allclose(output1, output2.numpy())

		def test_saliency_mapping(self):
		"""Test computing a saliency map."""
		n_tasks = 3
		n_features = 5
		model = dc.models.MultitaskRegressor(
		n_tasks,
		n_features, [20],
		activation_fns=tf.tanh,
		weight_init_stddevs=1.0)
		x = np.random.random(n_features)
		s = model.compute_saliency(x)
		assert s.shape[0] == n_tasks
		assert s.shape[1] == n_features

		# Take a tiny step in the direction of s and see if the output changes by
		# the expected amount.

		delta = 0.01
		for task in range(n_tasks):
		norm = np.sqrt(np.sum(s[task]**2))
		step = 0.5 * delta / norm
		pred1 = model.predict_on_batch((x + s[task] * step).reshape(
		(1, n_features))).flatten()
		pred2 = model.predict_on_batch((x - s[task] * step).reshape(
		(1, n_features))).flatten()
		self.assertAlmostEqual(
		pred1[task], (pred2 + norm * delta)[task], places=4)

Admin message