Merge pull request #1478 from VIGS25/textcnn-estimator (2ebaec77) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/graph_layers.py

+0 −1

Original line number	Diff line number	Diff line
		@@ -338,7 +338,6 @@ class DTNNEmbedding(Layer):
		self.build()

		atom_number = in_layers[0].out_tensor
		atom_number = tf.cast(atom_number, dtype=tf.int32)
		atom_features = tf.nn.embedding_lookup(self.embedding_list, atom_number)
		out_tensor = atom_features
		if set_tensors:

deepchem/models/tensorgraph/models/text_cnn.py

+26 −3

Original line number	Diff line number	Diff line
		@@ -6,6 +6,7 @@ Created on Thu Sep 28 15:17:50 2017
		import numpy as np
		import tensorflow as tf
		import copy
		import six

		from deepchem.metrics import to_one_hot, from_one_hot
		from deepchem.models.tensorgraph.layers import Dense, Concat, SoftMax, \
		@@ -205,6 +206,24 @@ class TextCNNModel(TensorGraph):
		weighted_loss = WeightedError(in_layers=[loss, weights])
		self.set_loss(weighted_loss)

		@staticmethod
		def convert_bytes_to_char(s):
		s = ''.join(chr(b) for b in s)
		return s

		def smiles_to_seq_batch(self, ids_b):
		"""Converts SMILES strings to np.array sequence.

		A tf.py_func wrapper is written around this when creating the input_fn for make_estimator
		"""
		if isinstance(
		ids_b[0],
		bytes) and not six.PY2: # Python 2.7 bytes and string are analogous
		ids_b = [TextCNNModel.convert_bytes_to_char(smiles) for smiles in ids_b]
		smiles_seqs = [self.smiles_to_seq(smiles) for smiles in ids_b]
		smiles_seqs = np.vstack(smiles_seqs)
		return smiles_seqs

		def default_generator(self,
		dataset,
		epochs=1,
		@@ -230,8 +249,7 @@ class TextCNNModel(TensorGraph):
		feed_dict[self.task_weights[0]] = w_b

		# Transform SMILES sequence to integers
		smiles_seqs = [self.smiles_to_seq(smiles) for smiles in ids_b]
		feed_dict[self.smiles_seqs] = np.vstack(smiles_seqs)
		feed_dict[self.smiles_seqs] = self.smiles_to_seq_batch(ids_b)
		yield feed_dict

		def create_estimator_inputs(self, feature_columns, weight_column, features,
		@@ -239,7 +257,12 @@ class TextCNNModel(TensorGraph):
		"""Creates tensors for inputs."""
		tensors = dict()
		for layer, column in zip(self.features, feature_columns):
		tensors[layer] = tf.feature_column.input_layer(features, [column])
		feature_col = tf.feature_column.input_layer(features, [column])
		if column.dtype != feature_col.dtype:
		feature_col = tf.cast(feature_col, column.dtype)
		if len(column.shape) < 1:
		feature_col = tf.reshape(feature_col, shape=[tf.shape(feature_col)[0]])
		tensors[layer] = feature_col
		if weight_column is not None:
		tensors[self.task_weights[0]] = tf.feature_column.input_layer(
		features, [weight_column])

deepchem/models/tensorgraph/tests/test_estimators.py

+8 −5

Original line number	Diff line number	Diff line
		@@ -299,7 +299,7 @@ class TestEstimators(unittest.TestCase):

		np.random.seed(123)
		smile_ids = ["CCCCC", "CCC(=O)O", "CCC", "CC(=O)O", "O=C=O"]
		X = [model.smiles_to_seq(smile) for smile in smile_ids]
		X = smile_ids
		y = np.zeros((n_samples, n_tasks))
		w = np.ones((n_samples, n_tasks))
		dataset = NumpyDataset(X, y, w, smile_ids)
		@@ -310,7 +310,8 @@ class TestEstimators(unittest.TestCase):
		def input_fn(epochs):
		x, y, weights = dataset.make_iterator(
		batch_size=n_samples, epochs=epochs).get_next()
		return {'x': x, 'weights': weights}, y
		smiles_seq = tf.py_func(model.smiles_to_seq_batch, inp=[x], Tout=tf.int32)
		return {'x': smiles_seq, 'weights': weights}, y

		# Create an estimator from it.
		x_col = tf.feature_column.numeric_column(
		@@ -345,7 +346,7 @@ class TestEstimators(unittest.TestCase):

		np.random.seed(123)
		smile_ids = ["CCCCC", "CCC(=O)O", "CCC", "CC(=O)O", "O=C=O"]
		X = [model.smiles_to_seq(smile) for smile in smile_ids]
		X = smile_ids
		y = np.zeros((n_samples, n_tasks, 1), dtype=np.float32)
		w = np.ones((n_samples, n_tasks))
		dataset = NumpyDataset(X, y, w, smile_ids)
		@@ -353,10 +354,12 @@ class TestEstimators(unittest.TestCase):
		def input_fn(epochs):
		x, y, weights = dataset.make_iterator(
		batch_size=n_samples, epochs=epochs).get_next()
		return {'x': x, 'weights': weights}, y
		smiles_seq = tf.py_func(model.smiles_to_seq_batch, inp=[x], Tout=tf.int32)
		return {'x': smiles_seq, 'weights': weights}, y

		# Create an estimator from it.
		x_col = tf.feature_column.numeric_column('x', shape=(seq_length,))
		x_col = tf.feature_column.numeric_column(
		'x', shape=(seq_length,), dtype=tf.int32)
		weight_col = tf.feature_column.numeric_column('weights', shape=(n_tasks,))
		metrics = {'error': tf.metrics.mean_absolute_error}
		estimator = model.make_estimator(

Admin message