make_estimator for text_cnn; corrections to DPNNEmbedding; Added tests (05789988) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/graph_layers.py

+3 −0

Original line number	Diff line number	Diff line
		@@ -338,10 +338,13 @@ class DTNNEmbedding(Layer):

		self.build()
		atom_number = in_layers[0].out_tensor
		atom_number = tf.cast(atom_number, dtype=tf.int32)
		atom_features = tf.nn.embedding_lookup(self.embedding_list, atom_number)
		out_tensor = atom_features
		if set_tensors:
		self.variables = self.trainable_weights
		self.out_tensor = atom_features
		return out_tensor

		def none_tensors(self):
		embedding_list = self.embedding_list

deepchem/models/tensorgraph/models/text_cnn.py

+19 −5

Original line number	Diff line number	Diff line
		@@ -207,9 +207,6 @@ class TextCNNModel(TensorGraph):
		self.labels_fd.append(label)
		cost = L2Loss(in_layers=[label, regression])
		costs.append(cost)
		if self.mode == "classification":
		all_cost = Stack(in_layers=costs, axis=1)
		elif self.mode == "regression":
		all_cost = Stack(in_layers=costs, axis=1)
		self.weights = Weights(shape=(None, self.n_tasks))
		loss = WeightedError(in_layers=[all_cost, self.weights])
		@@ -245,6 +242,23 @@ class TextCNNModel(TensorGraph):
		feed_dict[self.smiles_seqs] = np.stack(smiles_seqs, axis=0)
		yield feed_dict

		def create_estimator_inputs(self, feature_columns, weight_column, features,
		labels, mode):
		"""Creates tensors for inputs."""
		tensors = dict()
		for layer, column in zip(self.features, feature_columns):
		tensors[layer] = tf.feature_column.input_layer(features, [column])

		if weight_column is not None:
		tensors[self.task_weights[0]] = tf.feature_column.input_layer(
		features, [weight_column])
		if labels is not None:
		if self.mode == "classification":
		tensors[self.labels[0]] = tf.one_hot(tf.cast(labels, tf.int32), 2)
		else:
		tensors[self.labels[0]] = labels
		return tensors

		def smiles_to_seq(self, smiles):
		""" Tokenize characters in smiles to integers
		"""
		@@ -268,7 +282,7 @@ class TextCNNModel(TensorGraph):
		for i in range(self.seq_length - len(seq)):
		# Padding with '_'
		seq.append(self.char_dict['_'])
		return np.array(seq)
		return np.array(seq, dtype=np.int32)

		def predict_on_generator(self, generator, transformers=[], outputs=None):
		out = super(TextCNNModel, self).predict_on_generator(

deepchem/models/tensorgraph/tests/test_estimators.py

+92 −0

Original line number	Diff line number	Diff line
		@@ -4,6 +4,7 @@ import tensorflow as tf
		import deepchem as dc
		import deepchem.models.tensorgraph.layers as layers
		from deepchem.data import NumpyDataset
		from deepchem.models.tensorgraph.models.text_cnn import default_dict


		class TestEstimators(unittest.TestCase):
		@@ -279,6 +280,97 @@ class TestEstimators(unittest.TestCase):
		results = estimator.evaluate(input_fn=lambda: input_fn(1))
		assert results['accuracy'] > 0.9

		def test_textcnn_classification(self):
		"""Test creating an Estimator from TextCNN for classification."""

		n_tasks = 1
		n_samples = 5

		# Create a TensorGraph model.
		seq_length = 20
		model = dc.models.TextCNNModel(
		n_tasks=n_tasks,
		char_dict=default_dict,
		seq_length=seq_length,
		kernel_sizes=[5, 5],
		num_filters=[20, 20])

		np.random.seed(123)
		smile_ids = ["CCCCC", "CCC(=O)O", "CCC", "CC(=O)O", "O=C=O"]
		X = [model.smiles_to_seq(smile) for smile in smile_ids]
		y = np.zeros((n_samples, n_tasks))
		w = np.ones((n_samples, n_tasks))
		dataset = NumpyDataset(X, y, w, smile_ids)

		def accuracy(labels, predictions, weights):
		labels = tf.argmax(labels, axis=2)
		predictions = tf.argmax(predictions, axis=1)
		predictions = tf.expand_dims(predictions, axis=1)
		return tf.metrics.accuracy(labels, predictions, weights)

		def input_fn(epochs):
		x, y, weights = dataset.make_iterator(
		batch_size=n_samples, epochs=epochs).get_next()
		return {'x': x, 'weights': weights}, y

		# Create an estimator from it.
		x_col = tf.feature_column.numeric_column(
		'x', shape=(seq_length,), dtype=tf.int32)
		weight_col = tf.feature_column.numeric_column('weights', shape=(n_tasks,))
		metrics = {'accuracy': accuracy}
		estimator = model.make_estimator(
		feature_columns=[x_col], weight_column=weight_col, metrics=metrics)

		# Train the model.
		estimator.train(input_fn=lambda: input_fn(100))

		# Evaluate results
		results = estimator.evaluate(input_fn=lambda: input_fn(1))
		assert results['loss'] < 1e-2
		assert results['accuracy'] > 0.9

		def test_textcnn_regression(self):
		"""Test creating an Estimator from TextCNN for regression."""

		n_tasks = 1
		n_samples = 10

		# Create a TensorGraph model.
		seq_length = 20
		model = dc.models.TextCNNModel(
		n_tasks=n_tasks,
		char_dict=default_dict,
		seq_length=seq_length,
		kernel_sizes=[5, 5],
		num_filters=[20, 20],
		mode="regression")

		np.random.seed(123)
		smile_ids = ["CCCCC", "CCC(=O)O", "CCC", "CC(=O)O", "O=C=O"]
		X = [model.smiles_to_seq(smile) for smile in smile_ids]
		y = np.zeros((n_samples, n_tasks), dtype=np.float32)
		w = np.ones((n_samples, n_tasks))
		dataset = NumpyDataset(X, y, w, smile_ids)

		def input_fn(epochs):
		x, y, weights = dataset.make_iterator(
		batch_size=n_samples, epochs=epochs).get_next()
		return {'x': x, 'weights': weights}, y

		print(next(dataset.itersamples()))
		# Create an estimator from it.
		x_col = tf.feature_column.numeric_column('x', shape=(seq_length,))
		weight_col = tf.feature_column.numeric_column('weights', shape=(n_tasks,))
		metrics = {'error': tf.metrics.mean_absolute_error}
		estimator = model.make_estimator(
		feature_columns=[x_col], weight_column=weight_col, metrics=metrics)

		# Train the model.
		estimator.train(input_fn=lambda: input_fn(100))
		results = estimator.evaluate(input_fn=lambda: input_fn(1))
		assert results['loss'] < 1e-2
		assert results['error'] < 0.1

		def test_scscore(self):
		"""Test creating an Estimator from a ScScoreModel."""
		n_samples = 10

Admin message