Merge pull request #1218 from peastman/uncertainty (e2477458) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/fcnet.py

+30 −3

Original line number	Diff line number	Diff line
		@@ -18,7 +18,7 @@ from deepchem.metrics import to_one_hot, from_one_hot
		from deepchem.metrics import to_one_hot

		from deepchem.models.tensorgraph.tensor_graph import TensorGraph, TFWrapper
		from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMax, SoftMaxCrossEntropy, L2Loss, ReduceSum
		from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMax, SoftMaxCrossEntropy, L2Loss, ReduceSum, ReduceMean, Exp

		logger = logging.getLogger(__name__)

		@@ -211,6 +211,7 @@ class MultiTaskRegressor(TensorGraph):
		weight_decay_penalty_type="l2",
		dropouts=0.5,
		activation_fns=tf.nn.relu,
		uncertainty=False,
		**kwargs):
		"""Create a MultiTaskRegressor.

		@@ -244,6 +245,9 @@ class MultiTaskRegressor(TensorGraph):
		the Tensorflow activation function to apply to each layer. The length of this list should equal
		len(layer_sizes). Alternatively this may be a single value instead of a list, in which case the
		same value is used for every layer.
		uncertainty: bool
		if True, include extra outputs and loss terms to enable the uncertainty
		in outputs to be predicted
		"""
		super(MultiTaskRegressor, self).__init__(**kwargs)
		self.n_tasks = n_tasks
		@@ -257,6 +261,10 @@ class MultiTaskRegressor(TensorGraph):
		dropouts = [dropouts] * n_layers
		if not isinstance(activation_fns, collections.Sequence):
		activation_fns = [activation_fns] * n_layers
		if uncertainty:
		if any(d == 0.0 for d in dropouts):
		raise ValueError(
		'Dropout must be included in every layer to predict uncertainty')

		# Add the input features.

		@@ -296,7 +304,26 @@ class MultiTaskRegressor(TensorGraph):
		])
		self.add_output(output)
		labels = Label(shape=(None, n_tasks, 1))
		weights = Weights(shape=(None, n_tasks))
		weights = Weights(shape=(None, n_tasks, 1))
		if uncertainty:
		log_var = Reshape(
		shape=(-1, n_tasks, 1),
		in_layers=[
		Dense(
		in_layers=[prev_layer],
		out_channels=n_tasks,
		weights_initializer=TFWrapper(
		tf.truncated_normal_initializer,
		stddev=weight_init_stddevs[-1]),
		biases_initializer=TFWrapper(
		tf.constant_initializer, value=0.0))
		])
		var = Exp(log_var)
		self.add_variance(var)
		diff = labels - output
		weighted_loss = weights * (diff * diff / var + log_var)
		weighted_loss = ReduceSum(ReduceMean(weighted_loss, axis=[1, 2]))
		else:
		weighted_loss = ReduceSum(L2Loss(in_layers=[labels, output, weights]))
		if weight_decay_penalty != 0.0:
		weighted_loss = WeightDecay(

deepchem/models/tensorgraph/tensor_graph.py

+158 −13

Original line number	Diff line number	Diff line
		@@ -61,6 +61,7 @@ class TensorGraph(Model):
		self.features = list()
		self.labels = list()
		self.outputs = list()
		self.variances = list()
		self.task_weights = list()
		self.submodels = list()
		self.loss = Constant(0)
		@@ -363,8 +364,13 @@ class TensorGraph(Model):
		return results[0]
		return results

		def predict_on_generator(self, generator, transformers=[], outputs=None):
		def _predict(self, generator, transformers, outputs, uncertainty):
		"""
		Predict outputs for data provided by a generator.

		This is the private implementation of prediction. Do not call it directly.
		Instead call one of the public prediction methods.

		Parameters
		----------
		generator: Generator
		@@ -376,6 +382,10 @@ class TensorGraph(Model):
		If outputs is a Layer/Tensor, then will evaluate and return as a
		single ndarray. If outputs is a list of Layers/Tensors, will return a list
		of ndarrays.
		uncertainty: bool
		specifies whether this is being called as part of estimating uncertainty.
		If True, it sets the training flag so that dropout will be enabled, and
		returns the values of the uncertainty outputs.
		Returns:
		y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
		"""
		@@ -385,9 +395,19 @@ class TensorGraph(Model):
		outputs = self.outputs
		elif not isinstance(outputs, collections.Sequence):
		outputs = [outputs]
		if uncertainty:
		if len(self.variances) == 0:
		raise ValueError('This model cannot compute uncertainties')
		if len(self.variances) != len(outputs):
		raise ValueError(
		'The number of variances must exactly match the number of outputs')
		tensors = outputs + self.variances
		else:
		tensors = outputs

		with self._get_tf("Graph").as_default():
		# Gather results for each output
		results = [[] for out in outputs]
		results = [[] for out in tensors]
		n_samples = 0
		n_enqueued = [0]
		final_sample = [None]
		@@ -397,7 +417,7 @@ class TensorGraph(Model):
		args=(self, generator, self._get_tf("Graph"), self.session,
		n_enqueued, final_sample))
		enqueue_thread.start()
		for feed_dict in self._create_feed_dicts(generator, False):
		for feed_dict in self._create_feed_dicts(generator, uncertainty):
		if self.queue_installed:
		# Don't let this thread get ahead of the enqueue thread, since if
		# we try to read more batches than the total number that get queued,
		@@ -409,7 +429,7 @@ class TensorGraph(Model):
		if n_samples == final_sample[0]:
		break
		n_samples += 1
		feed_results = self._run_graph(outputs, feed_dict, False)
		feed_results = self._run_graph(tensors, feed_dict, uncertainty)
		if tfe.in_eager_mode():
		feed_results = [f.numpy() for f in feed_results]
		if len(feed_results) > 1:
		@@ -428,9 +448,29 @@ class TensorGraph(Model):
		# If only one output, just return array
		if len(final_results) == 1:
		return final_results[0]
		elif uncertainty:
		return zip(final_results[:len(outputs)], final_results[len(outputs):])
		else:
		return final_results

		def predict_on_generator(self, generator, transformers=[], outputs=None):
		"""
		Parameters
		----------
		generator: Generator
		Generator that constructs feed dictionaries for TensorGraph.
		transformers: list
		List of dc.trans.Transformers.
		outputs: object
		If outputs is None, then will assume outputs = self.outputs.
		If outputs is a Layer/Tensor, then will evaluate and return as a
		single ndarray. If outputs is a list of Layers/Tensors, will return a list
		of ndarrays.
		Returns:
		y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
		"""
		return self._predict(generator, transformers, outputs, False)

		def predict_proba_on_generator(self, generator, transformers=[],
		outputs=None):
		"""
		@@ -457,6 +497,33 @@ class TensorGraph(Model):
		generator = self.default_generator(dataset, predict=True, pad_batches=False)
		return self.predict_on_generator(generator, transformers, outputs)

		def predict_uncertainty_on_batch(self, X, masks=50):
		"""
		Predict the model's outputs, along with the uncertainty in each one.

		The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
		It involves repeating the prediction many times with different dropout masks.
		The prediction is computed as the average over all the predictions. The
		uncertainty includes both the variation among the predicted values (epistemic
		uncertainty) and the model's own estimates for how well it fits the data
		(aleatoric uncertainty). Not all models support uncertainty prediction.

		Parameters
		----------
		X: ndarray
		the input data, as a Numpy array.
		masks: int
		the number of dropout masks to average over

		Returns
		-------
		for each output, a tuple (y_pred, y_std) where y_pred is the predicted
		value of the output, and each element of y_std estimates the standard
		deviation of the corresponding element of y_pred
		"""
		dataset = NumpyDataset(X=X, y=None)
		return self.predict_uncertainty(dataset, masks)

		def predict_proba_on_batch(self, X, transformers=[], outputs=None):
		"""Generates predictions for input samples, processing samples in a batch.

		@@ -484,10 +551,9 @@ class TensorGraph(Model):
		transformers: list
		List of dc.trans.Transformers.
		outputs: object
		If outputs is None, then will assume outputs = self.outputs[0] (single
		output). If outputs is a Layer/Tensor, then will evaluate and return as a
		single ndarray. If outputs is a list of Layers/Tensors, will return a list
		of ndarrays.
		If outputs is None, then will assume outputs=self.outputs. If outputs is
		a Layer/Tensor, then will evaluate and return as a single ndarray. If
		outputs is a list of Layers/Tensors, will return a list of ndarrays.

		Returns
		-------
		@@ -496,6 +562,58 @@ class TensorGraph(Model):
		generator = self.default_generator(dataset, predict=True, pad_batches=False)
		return self.predict_on_generator(generator, transformers, outputs)

		def predict_uncertainty(self, dataset, masks=50):
		"""
		Predict the model's outputs, along with the uncertainty in each one.

		The uncertainty is computed as described in https://arxiv.org/abs/1703.04977.
		It involves repeating the prediction many times with different dropout masks.
		The prediction is computed as the average over all the predictions. The
		uncertainty includes both the variation among the predicted values (epistemic
		uncertainty) and the model's own estimates for how well it fits the data
		(aleatoric uncertainty). Not all models support uncertainty prediction.

		Parameters
		----------
		dataset: dc.data.Dataset
		Dataset to make prediction on
		masks: int
		the number of dropout masks to average over

		Returns
		-------
		for each output, a tuple (y_pred, y_std) where y_pred is the predicted
		value of the output, and each element of y_std estimates the standard
		deviation of the corresponding element of y_pred
		"""
		sum_pred = []
		sum_sq_pred = []
		sum_var = []
		for i in range(masks):
		generator = self.default_generator(
		dataset, predict=True, pad_batches=False)
		results = self._predict(generator, [], self.outputs, True)
		if len(sum_pred) == 0:
		for p, v in results:
		sum_pred.append(p)
		sum_sq_pred.append(p * p)
		sum_var.append(v)
		else:
		for j, (p, v) in enumerate(results):
		sum_pred[j] += p
		sum_sq_pred[j] += p * p
		sum_var[j] += v
		output = []
		std = []
		for i in range(len(sum_pred)):
		p = sum_pred[i] / masks
		output.append(p)
		std.append(np.sqrt(sum_sq_pred[i] / masks - p * p + sum_var[i] / masks))
		if len(output) == 1:
		return (output[0], std[0])
		else:
		return zip(output, std)

		def predict_proba(self, dataset, transformers=[], outputs=None):
		"""
		Parameters
		@@ -505,10 +623,9 @@ class TensorGraph(Model):
		transformers: list
		List of dc.trans.Transformers.
		outputs: object
		If outputs is None, then will assume outputs = self.outputs[0] (single
		output). If outputs is a Layer/Tensor, then will evaluate and return as a
		single ndarray. If outputs is a list of Layers/Tensors, will return a list
		of ndarrays.
		If outputs is None, then will assume outputs=self.outputs. If outputs is
		a Layer/Tensor, then will evaluate and return as a single ndarray. If
		outputs is a list of Layers/Tensors, will return a list of ndarrays.

		Returns
		-------
		@@ -527,7 +644,7 @@ class TensorGraph(Model):
		sorted_layers.append(layer)

		sorted_layers = []
		for l in self.features + self.labels + self.task_weights + self.outputs:
		for l in self.features + self.labels + self.task_weights + self.outputs + self.variances:
		add_layers_to_list(l, sorted_layers)
		add_layers_to_list(self.loss, sorted_layers)
		for submodel in self.submodels:
		@@ -565,6 +682,8 @@ class TensorGraph(Model):
		build_layers(self.loss, tensors)
		for output in self.outputs:
		build_layers(output, tensors)
		for variance in self.variances:
		build_layers(variance, tensors)
		for submodel in self.submodels:
		build_layers(submodel.loss, tensors)

		@@ -666,9 +785,24 @@ class TensorGraph(Model):
		self.loss = layer

		def add_output(self, layer):
		"""Add an output layer that can be computed by predict()"""
		self._add_layer(layer)
		self.outputs.append(layer)

		def add_variance(self, layer):
		"""Add a layer that computes the variance in an output.

		If a model supports uncertainty, it must call add_variance() once for every
		output. Each variance layer has the same shape as the corresponding output,
		and each element computes an estimate of the variance from aleatoric
		uncertainty in the corresponding element of the output.

		In addition, if a model supports uncertainty it MUST use dropout on every
		layer. Otherwise, the uncertainties it computes will be inaccurate.
		"""
		self._add_layer(layer)
		self.variances.append(layer)

		def set_optimizer(self, optimizer):
		"""Set the optimizer to use for fitting."""
		self.optimizer = optimizer
		@@ -971,6 +1105,17 @@ class TensorGraph(Model):
		feed_dict = {}
		for key, value in d.items():
		if isinstance(key, Input):
		# Add or remove dimensions of size 1 to match the shape of the layer.
		value_dims = len(value.shape)
		layer_dims = len(key.shape)
		if value_dims < layer_dims:
		if all(i == 1 for i in key.shape[value_dims:]):
		value = tf.reshape(value,
		list(value.shape) + [1] *
		(layer_dims - value_dims))
		if value_dims > layer_dims:
		if all(i == 1 for i in value.shape[layer_dims:]):
		value = tf.reshape(value, value.shape[:layer_dims])
		feed_dict[key] = tf.cast(value, key.dtype)
		else:
		feed_dict[key] = value

deepchem/models/tests/test_overfit.py

+30 −0

Original line number	Diff line number	Diff line
		@@ -939,3 +939,33 @@ class TestOverfit(test_util.TensorFlowTestCase):
		# Eval model on train
		scores = model.evaluate(dataset, [metric])
		assert scores[metric.name] < .2

		def test_multitask_regressor_uncertainty(self):
		"""Test computing uncertainty for a MultitaskRegressor."""
		n_tasks = 1
		n_samples = 30
		n_features = 1
		noise = 0.1

		# Generate dummy dataset
		X = np.random.rand(n_samples, n_features, 1)
		y = 10 * X + np.random.normal(scale=noise, size=(n_samples, n_tasks, 1))
		dataset = dc.data.NumpyDataset(X, y)

		model = dc.models.MultiTaskRegressor(
		n_tasks,
		n_features,
		layer_sizes=[200],
		weight_init_stddevs=[.1],
		batch_size=n_samples,
		dropouts=0.1,
		learning_rate=0.003,
		uncertainty=True)

		# Fit trained model
		model.fit(dataset, nb_epoch=2500)

		# Predict the output and uncertainty.
		pred, std = model.predict_uncertainty(dataset)
		assert np.mean(np.abs(y - pred)) < 1.0
		assert noise < np.mean(std) < 1.0

examples/notebooks/Uncertainty.ipynb

0 → 100644

+231 −0

File added.

Preview size limit exceeded, changes collapsed.

examples/notebooks/tests.py

+5 −0

Original line number	Diff line number	Diff line
		@@ -88,3 +88,8 @@ def test_seqtoseq_fingerprint():
		def test_dataset_preparation():
		nb, errors = _notebook_read("dataset_preparation.ipynb")
		assert errors == []


		def test_uncertainty():
		nb, errors = _notebook_read("Uncertainty.ipynb")
		assert errors == []

Admin message