Merge pull request #805 from galenxing/master (8e765720) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/layers.py

+206 −0

Original line number	Diff line number	Diff line
		@@ -2705,3 +2705,209 @@ class AtomicConvolution(Layer):
		R = tf.reduce_sum(tf.multiply(D, D), 3)
		R = tf.sqrt(R)
		return R


		def AlphaShare(in_layers=None, **kwargs):
		"""
		This method should be used when constructing AlphaShare layers from Sluice Networks

		Parameters
		----------
		in_layers: list of Layers or tensors
		tensors in list must be the same size and list must include two or more tensors

		Returns
		-------
		output_layers: list of Layers or tensors with same size as in_layers
		Distance matrix.

		References:
		Sluice networks: Learning what to share between loosely related tasks
		https://arxiv.org/abs/1705.08142
		"""
		output_layers = []
		alpha_share = AlphaShareLayer(in_layers=in_layers, **kwargs)
		num_outputs = len(in_layers)
		for num_layer in range(0, num_outputs):
		ls = LayerSplitter(output_num=num_layer, in_layers=alpha_share)
		output_layers.append(ls)
		return output_layers


		class AlphaShareLayer(Layer):
		"""
		Part of a sluice network. Adds alpha parameters to control
		sharing between the main and auxillary tasks

		Factory method AlphaShare should be used for construction

		Parameters
		----------
		in_layers: list of Layers or tensors
		tensors in list must be the same size and list must include two or more tensors

		Returns
		-------
		out_tensor: a tensor with shape [len(in_layers), x, y] where x, y were the original layer dimensions
		out_tensor should be fed into LayerSplitter
		Distance matrix.
		"""

		def __init__(self, **kwargs):
		super(AlphaShareLayer, self).__init__(**kwargs)

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		# check that there isnt just one or zero inputs
		if len(inputs) <= 1:
		raise ValueError("AlphaShare must have more than one input")
		self.num_outputs = len(inputs)
		# create subspaces
		subspaces = []
		original_cols = int(inputs[0].get_shape()[-1].value)
		subspace_size = int(original_cols / 2)
		for input_tensor in inputs:
		subspaces.append(tf.reshape(input_tensor[:, :subspace_size], [-1]))
		subspaces.append(tf.reshape(input_tensor[:, subspace_size:], [-1]))
		n_alphas = len(subspaces)
		subspaces = tf.reshape(tf.stack(subspaces), [n_alphas, -1])

		# create the alpha learnable parameters
		alphas = tf.Variable(tf.random_normal([n_alphas, n_alphas]), name='alphas')

		subspaces = tf.matmul(alphas, subspaces)

		# concatenate subspaces, reshape to size of original input, then stack
		# such that out_tensor has shape (2,?,original_cols)
		count = 0
		out_tensor = []
		tmp_tensor = []
		for row in range(n_alphas):
		tmp_tensor.append(tf.reshape(subspaces[row,], [-1, subspace_size]))
		count += 1
		if (count == 2):
		out_tensor.append(tf.concat(tmp_tensor, 1))
		tmp_tensor = []
		count = 0

		out_tensor = tf.stack(out_tensor)

		self.alphas = alphas
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor

		def none_tensors(self):
		num_outputs, out_tensor, alphas = self.num_outputs, self.out_tensor, self.alphas
		self.num_outputs = None
		self.out_tensor = None
		self.alphas = None
		return num_outputs, out_tensor, alphas

		def set_tensors(self, tensor):
		self.num_outputs, self.out_tensor, self.alphas = tensor


		class LayerSplitter(Layer):
		"""
		Returns the nth output of a layer
		Assumes out_tensor has shape [x, :] where x is the total number of intended output tensors
		"""

		def __init__(self, output_num, **kwargs):
		"""
		Parameters
		----------
		output_num: int
		returns the out_tensor[output_num, :] of a layer
		"""
		self.output_num = output_num
		super(LayerSplitter, self).__init__(**kwargs)

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)[0]
		self.out_tensor = inputs[self.output_num, :]
		out_tensor = self.out_tensor
		return self.out_tensor

		def none_tensors(self):
		out_tensor = self.out_tensor
		self.out_tensor = None
		return out_tensor

		def set_tensors(self, tensor):
		self.out_tensor = tensor


		class SluiceLoss(Layer):
		"""
		Calculates the loss in a Sluice Network
		Every input into an AlphaShare should be used in SluiceLoss
		"""

		def __init__(self, **kwargs):
		super(SluiceLoss, self).__init__(**kwargs)

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		temp = []
		subspaces = []
		# creates subspaces the same way it was done in AlphaShare
		for input_tensor in inputs:
		subspace_size = int(input_tensor.get_shape()[-1].value / 2)
		subspaces.append(input_tensor[:, :subspace_size])
		subspaces.append(input_tensor[:, subspace_size:])
		product = tf.matmul(tf.transpose(subspaces[0]), subspaces[1])
		subspaces = []
		# calculate squared Frobenius norm
		temp.append(tf.reduce_sum(tf.pow(product, 2)))
		out_tensor = tf.reduce_sum(temp)
		self.out_tensor = out_tensor
		return out_tensor


		class BetaShare(Layer):
		"""
		Part of a sluice network. Adds beta params to control which layer
		outputs are used for prediction

		Parameters
		----------
		in_layers: list of Layers or tensors
		tensors in list must be the same size and list must include two or more tensors

		Returns
		-------
		output_layers: list of Layers or tensors with same size as in_layers
		Distance matrix.
		"""

		def __init__(self, **kwargs):
		super(BetaShare, self).__init__(**kwargs)

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		"""
		Size of input layers must all be the same
		"""
		inputs = self._get_input_tensors(in_layers)
		subspaces = []
		original_cols = int(inputs[0].get_shape()[-1].value)
		for input_tensor in inputs:
		subspaces.append(tf.reshape(input_tensor, [-1]))
		n_betas = len(inputs)
		subspaces = tf.reshape(tf.stack(subspaces), [n_betas, -1])

		betas = tf.Variable(tf.random_normal([1, n_betas]), name='betas')
		out_tensor = tf.matmul(betas, subspaces)
		self.betas = betas
		self.out_tensor = tf.reshape(out_tensor, [-1, original_cols])
		return self.out_tensor

		def none_tensors(self):
		out_tensor, betas = self.out_tensor, self.betas
		self.out_tensor = None
		self.betas = None
		return out_tensor, betas

		def set_tensors(self, tensor):
		self.out_tensor, self.betas = tensor

deepchem/models/tensorgraph/tests/test_layers.py

+65 −0

Original line number	Diff line number	Diff line
		@@ -48,6 +48,10 @@ from deepchem.models.tensorgraph.layers import TensorWrapper
		from deepchem.models.tensorgraph.layers import LSTMStep
		from deepchem.models.tensorgraph.layers import AttnLSTMEmbedding
		from deepchem.models.tensorgraph.layers import IterRefLSTMEmbedding
		from deepchem.models.tensorgraph.layers import AlphaShareLayer
		from deepchem.models.tensorgraph.layers import BetaShare
		from deepchem.models.tensorgraph.layers import LayerSplitter
		from deepchem.models.tensorgraph.layers import SluiceLoss

		import deepchem as dc

		@@ -626,3 +630,64 @@ class TestLayers(test_util.TensorFlowTestCase):
		assert result == 1.5
		result = sess.run(tf.gradients(v, v))
		assert result[0] == 1.0

		def test_alpha_share_layer(self):
		"""Test that alpha share works correctly"""
		batch_size = 50
		length = 10
		test_1 = np.random.rand(batch_size, length)
		test_2 = np.random.rand(batch_size, length)

		with self.test_session() as sess:
		test_1 = tf.convert_to_tensor(test_1, dtype=tf.float32)
		test_2 = tf.convert_to_tensor(test_2, dtype=tf.float32)

		out_tensor = AlphaShareLayer()(test_1, test_2)
		sess.run(tf.global_variables_initializer())
		test_1_out_tensor = out_tensor[0].eval()
		test_2_out_tensor = out_tensor[1].eval()
		assert test_1.shape == test_1_out_tensor.shape
		assert test_2.shape == test_2_out_tensor.shape

		def test_beta_share(self):
		"""Test that beta share works correctly"""
		batch_size = 50
		length = 10
		test_1 = np.random.rand(batch_size, length)
		test_2 = np.random.rand(batch_size, length)

		with self.test_session() as sess:
		test_1 = tf.convert_to_tensor(test_1, dtype=tf.float32)
		test_2 = tf.convert_to_tensor(test_2, dtype=tf.float32)

		out_tensor = BetaShare()(test_1, test_2)
		sess.run(tf.global_variables_initializer())
		out_tensor.eval()
		assert test_1.shape == out_tensor.shape
		assert test_2.shape == out_tensor.shape

		def test_layer_splitter(self):
		"""Test Layer Splitter"""
		input1 = np.arange(10).reshape(2, 5)
		input2 = np.arange(10, 20).reshape(2, 5)

		with self.test_session() as sess:
		input1 = tf.convert_to_tensor(input1, dtype=tf.float32)
		input2 = tf.convert_to_tensor(input2, dtype=tf.float32)
		input_tensor = tf.stack([input1, input2])
		output1 = LayerSplitter(0)(input_tensor)
		output2 = LayerSplitter(1)(input_tensor)
		sess.run(tf.global_variables_initializer())
		sess.run(tf.assert_equal(input1, output1.eval()))
		sess.run(tf.assert_equal(input2, output2.eval()))

		def test_sluice_loss(self):
		"""Test the sluice loss function"""
		input1 = np.ones((3, 4))
		input2 = np.ones((2, 2))
		with self.test_session() as sess:
		input1 = tf.convert_to_tensor(input1, dtype=tf.float32)
		input2 = tf.convert_to_tensor(input2, dtype=tf.float32)
		output_tensor = SluiceLoss()(input1, input2)
		sess.run(tf.global_variables_initializer())
		assert output_tensor.eval() == 40.0

examples/tox21/tox21_tensorgraph_graphconv_sluice.py

0 → 100644

+192 −0

Original line number	Diff line number	Diff line
		"""
		Script that trains graph-conv models on Tox21 dataset.
		"""
		from __future__ import print_function
		from __future__ import division
		from __future__ import unicode_literals

		import numpy as np
		import six
		import sys

		from deepchem.models.tensorgraph import TensorGraph
		from deepchem.metrics import to_one_hot

		from deepchem.feat.mol_graphs import ConvMol
		from deepchem.models.tensorgraph.layers import Input, GraphConv, BatchNorm, GraphPool, Dense, GraphGather, \
		SoftMax, SoftMaxCrossEntropy, Concat, WeightedError, Label, Constant, Weights, Feature, AlphaShare, SluiceLoss, Add

		np.random.seed(123)
		import tensorflow as tf

		tf.set_random_seed(123)
		import deepchem as dc
		from tox21_datasets import load_tox21


		def sluice_model(batch_size, tasks):
		model = TensorGraph(
		model_dir=model_dir,
		batch_size=batch_size,
		use_queue=False,
		tensorboard=True)
		atom_features = Feature(shape=(None, 75))
		degree_slice = Feature(shape=(None, 2), dtype=tf.int32)
		membership = Feature(shape=(None,), dtype=tf.int32)

		sluice_loss = []
		deg_adjs = []
		for i in range(0, 10 + 1):
		deg_adj = Feature(shape=(None, i + 1), dtype=tf.int32)
		deg_adjs.append(deg_adj)

		gc1 = GraphConv(
		64,
		activation_fn=tf.nn.relu,
		in_layers=[atom_features, degree_slice, membership] + deg_adjs)

		as1 = AlphaShare(in_layers=[gc1, gc1])
		sluice_loss.append(gc1)

		batch_norm1a = BatchNorm(in_layers=[as1[0]])
		batch_norm1b = BatchNorm(in_layers=[as1[1]])

		gp1a = GraphPool(
		in_layers=[batch_norm1a, degree_slice, membership] + deg_adjs)
		gp1b = GraphPool(
		in_layers=[batch_norm1b, degree_slice, membership] + deg_adjs)

		gc2a = GraphConv(
		64,
		activation_fn=tf.nn.relu,
		in_layers=[gp1a, degree_slice, membership] + deg_adjs)
		gc2b = GraphConv(
		64,
		activation_fn=tf.nn.relu,
		in_layers=[gp1b, degree_slice, membership] + deg_adjs)

		as2 = AlphaShare(in_layers=[gc2a, gc2b])
		sluice_loss.append(gc2a)
		sluice_loss.append(gc2b)

		batch_norm2a = BatchNorm(in_layers=[as2[0]])
		batch_norm2b = BatchNorm(in_layers=[as2[1]])

		gp2a = GraphPool(
		in_layers=[batch_norm2a, degree_slice, membership] + deg_adjs)
		gp2b = GraphPool(
		in_layers=[batch_norm2b, degree_slice, membership] + deg_adjs)

		densea = Dense(out_channels=128, activation_fn=None, in_layers=[gp2a])
		denseb = Dense(out_channels=128, activation_fn=None, in_layers=[gp2b])

		batch_norm3a = BatchNorm(in_layers=[densea])
		batch_norm3b = BatchNorm(in_layers=[denseb])

		as3 = AlphaShare(in_layers=[batch_norm3a, batch_norm3b])
		sluice_loss.append(batch_norm3a)
		sluice_loss.append(batch_norm3b)

		gg1a = GraphGather(
		batch_size=batch_size,
		activation_fn=tf.nn.tanh,
		in_layers=[as3[0], degree_slice, membership] + deg_adjs)
		gg1b = GraphGather(
		batch_size=batch_size,
		activation_fn=tf.nn.tanh,
		in_layers=[as3[1], degree_slice, membership] + deg_adjs)

		costs = []
		labels = []
		count = 0
		for task in tasks:
		if count < len(tasks) / 2:
		classification = Dense(
		out_channels=2, activation_fn=None, in_layers=[gg1a])
		print("first half:")
		print(task)
		else:
		classification = Dense(
		out_channels=2, activation_fn=None, in_layers=[gg1b])
		print('second half')
		print(task)
		count += 1

		softmax = SoftMax(in_layers=[classification])
		model.add_output(softmax)

		label = Label(shape=(None, 2))
		labels.append(label)
		cost = SoftMaxCrossEntropy(in_layers=[label, classification])
		costs.append(cost)

		entropy = Concat(in_layers=costs)
		task_weights = Weights(shape=(None, len(tasks)))
		task_loss = WeightedError(in_layers=[entropy, task_weights])

		s_cost = SluiceLoss(in_layers=sluice_loss)

		total_loss = Add(in_layers=[task_loss, s_cost])
		model.set_loss(total_loss)

		def feed_dict_generator(dataset, batch_size, epochs=1):
		for epoch in range(epochs):
		for ind, (X_b, y_b, w_b, ids_b) in enumerate(
		dataset.iterbatches(batch_size, pad_batches=True)):
		d = {}
		for index, label in enumerate(labels):
		d[label] = to_one_hot(y_b[:, index])
		d[task_weights] = w_b
		multiConvMol = ConvMol.agglomerate_mols(X_b)
		d[atom_features] = multiConvMol.get_atom_features()
		d[degree_slice] = multiConvMol.deg_slice
		d[membership] = multiConvMol.membership
		for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
		d[deg_adjs[i - 1]] = multiConvMol.get_deg_adjacency_lists()[i]
		yield d

		return model, feed_dict_generator, labels, task_weights


		model_dir = "tmp/graphconv"

		# Load Tox21 dataset
		tox21_tasks, tox21_datasets, transformers = load_tox21(featurizer='GraphConv')
		train_dataset, valid_dataset, test_dataset = tox21_datasets
		print(train_dataset.data_dir)
		print(valid_dataset.data_dir)

		# Fit models
		metric = dc.metrics.Metric(
		dc.metrics.roc_auc_score, np.mean, mode="classification")

		# Batch size of models
		batch_size = 100

		num_epochs = 10

		model, generator, labels, task_weights = sluice_model(batch_size, tox21_tasks)

		model.fit_generator(
		generator(train_dataset, batch_size, epochs=num_epochs),
		checkpoint_interval=1000)

		print("Evaluating model")
		train_scores = model.evaluate_generator(
		generator(train_dataset, batch_size), [metric],
		transformers,
		labels,
		weights=[task_weights],
		per_task_metrics=True)
		valid_scores = model.evaluate_generator(
		generator(valid_dataset, batch_size), [metric],
		transformers,
		labels,
		weights=[task_weights],
		per_task_metrics=True)

		print("Train scores")
		print(train_scores)

		print("Validation scores")
		print(valid_scores)

Admin message