style change (c6f381a5) · Commits · 钟慕尧 / deepchem

deepchem/models/tests/test_overfit.py

+4 −4

Original line number	Diff line number	Diff line
		@@ -677,7 +677,8 @@ class TestOverfit(test_util.TensorFlowTestCase):
		w = np.ones_like(y)
		dataset = dc.data.DiskDataset.from_numpy(X, y, w, ids=None)
		regression_metric = dc.metrics.Metric(
		dc.metrics.mean_absolute_error, mode="regression", task_averager=np.mean)
		dc.metrics.r2_score,
		task_averager=np.mean)
		n_tasks = y.shape[1]
		n_feat = list(dataset.get_data_shape())
		batch_size = 10
		@@ -703,10 +704,9 @@ class TestOverfit(test_util.TensorFlowTestCase):
		model.save()

		# Eval model on train
		scores = model.evaluate(dataset, [classification_metric])

		assert scores[classification_metric.name] < .2
		scores = model.evaluate(dataset, [regression_metric])

		assert scores[regression_metric.name] < .2

		def test_siamese_singletask_classification_overfit(self):
		"""Test siamese singletask model overfits tiny data."""

deepchem/models/tf_new_models/graph_models.py

+12 −3

Original line number	Diff line number	Diff line
		@@ -79,13 +79,18 @@ class SequentialGraph(object):
		def get_layer(self, layer_id):
		return self.layers[layer_id]


		class SequentialDTNNGraph(SequentialGraph):
		"""An analog of Keras Sequential class for Coulomb Matrix data.

		automatically generates and passes topology placeholders to each layer.
		"""

		def __init__(self, max_n_atoms, n_distance=100, distance_min=-1., distance_max=18.):
		def __init__(self,
		max_n_atoms,
		n_distance=100,
		distance_min=-1.,
		distance_max=18.):
		"""
		Parameters
		----------
		@@ -101,7 +106,11 @@ class SequentialDTNNGraph(SequentialGraph):
		"""
		self.graph = tf.Graph()
		with self.graph.as_default():
		self.graph_topology = DTNNGraphTopology(max_n_atoms, n_distance, distance_min=distance_min, distance_max=distance_max)
		self.graph_topology = DTNNGraphTopology(
		max_n_atoms,
		n_distance,
		distance_min=distance_min,
		distance_max=distance_max)
		self.output = self.graph_topology.get_atom_number_placeholder()
		# Keep track of the layers
		self.layers = []
		@@ -116,13 +125,13 @@ class SequentialDTNNGraph(SequentialGraph):
		self.output = layer(self.output)
		self.layers.append(layer)


		def return_inputs(self):
		return self.graph_topology.get_atom_number_placeholders()

		def get_layer(self, layer_id):
		return self.layers[layer_id]


		class SequentialSupportGraph(object):
		"""An analog of Keras Sequential model for test/support models."""

deepchem/models/tf_new_models/graph_topology.py

+25 −16

Original line number	Diff line number	Diff line
		@@ -141,10 +141,16 @@ class GraphTopology(object):
		}
		return merge_dicts([atoms_dict, deg_adj_dict])


		class DTNNGraphTopology(GraphTopology):
		"""Manages placeholders associated with batch of graphs and their topology"""

		def __init__(self, max_n_atoms, n_distance=100, distance_min=-1., distance_max=18., name='DTNN_topology'):
		def __init__(self,
		max_n_atoms,
		n_distance=100,
		distance_min=-1.,
		distance_max=18.,
		name='DTNN_topology'):
		"""
		Parameters
		----------
		@@ -180,8 +186,9 @@ class DTNNGraphTopology(GraphTopology):
		name=self.name + '_distance_matrix_mask')

		# Define the list of tensors to be used as topology
		self.topology = [self.distance_matrix_placeholder,
		self.distance_matrix_mask_placeholder]
		self.topology = [
		self.distance_matrix_placeholder, self.distance_matrix_mask_placeholder
		]
		self.inputs = [self.atom_number_placeholder]
		self.inputs += self.topology

		@@ -209,21 +216,24 @@ class DTNNGraphTopology(GraphTopology):
		"""
		# Extract atom numbers
		atom_number = np.asarray(map(np.diag, batch))
		atom_number = np.asarray(np.round(np.power(2*atom_number, 1/2.4)), dtype=int)
		atom_number = np.asarray(
		np.round(np.power(2 * atom_number, 1 / 2.4)), dtype=int)
		ZiZj = []
		for molecule in atom_number:
		ZiZj.append(np.outer(molecule, molecule))
		ZiZj = np.asarray(ZiZj)
		distance_matrix = np.expand_dims(batch[:], axis=3)
		distance_matrix = np.concatenate([distance_matrix]*self.n_distance, axis=3)
		distance_matrix = np.concatenate(
		[distance_matrix] * self.n_distance, axis=3)
		distance_matrix_mask = batch[:]
		for im, molecule in enumerate(batch):
		for ir, row in enumerate(molecule):
		for ie, element in enumerate(row):
		if element > 0 and ir != ie:
		# expand a float value distance to a distance vector
		distance_matrix[im, ir, ie, :] = self.gauss_expand(ZiZj[im, ir, ie]/element,
		self.n_distance, self.distance_min, self.distance_max)
		distance_matrix[im, ir, ie, :] = self.gauss_expand(
		ZiZj[im, ir, ie] / element, self.n_distance, self.distance_min,
		self.distance_max)
		distance_matrix_mask[im, ir, ie] = 1
		else:
		distance_matrix[im, ir, ie, :] = 0
		@@ -242,4 +252,3 @@ class DTNNGraphTopology(GraphTopology):
		steps = np.array([distance_min + i * step_size for i in range(n_distance)])
		distance_vector = np.exp(-np.square(distance - steps) / (2 * step_size**2))
		return distance_vector

deepchem/molnet/run_benchmark_models.py

+11 −6

Original line number	Diff line number	Diff line
		@@ -303,7 +303,8 @@ def benchmark_regression(
		test_scores = {}

		assert model in [
		'tf_regression', 'tf_regression_ft', 'rf_regression', 'graphconvreg', 'DTNN'
		'tf_regression', 'tf_regression_ft', 'rf_regression', 'graphconvreg',
		'DTNN'
		]
		if hyper_parameters is None:
		hyper_parameters = hps[model]
		@@ -412,12 +413,16 @@ def benchmark_regression(
		n_hidden = hyper_parameters['n_hidden']

		tf.set_random_seed(seed)
		graph_model = deepchem.nn.SequentialDTNNGraph(max_n_atoms=n_features[0],
		n_distance=n_distance)
		graph_model = deepchem.nn.SequentialDTNNGraph(
		max_n_atoms=n_features[0], n_distance=n_distance)
		graph_model.add(deepchem.nn.DTNNEmbedding(n_embedding=n_embedding))
		graph_model.add(deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
		graph_model.add(deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
		graph_model.add(deepchem.nn.DTNNGather(n_tasks=len(tasks), n_embedding=n_embedding, n_hidden=n_hidden))
		graph_model.add(
		deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
		graph_model.add(
		deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
		graph_model.add(
		deepchem.nn.DTNNGather(
		n_tasks=len(tasks), n_embedding=n_embedding, n_hidden=n_hidden))

		model = deepchem.models.DTNNRegressor(
		graph_model,

deepchem/nn/layers.py

+45 −24

Original line number	Diff line number	Diff line
		@@ -820,9 +820,11 @@ class LSTMStep(Layer):
		return h, [h, c]
		####################################################### DEBUG


		class DTNNEmbedding(Layer):
		"""Generate embeddings for all atoms in the batch
		"""

		def __init__(self,
		n_embedding=20,
		periodic_table_length=83,
		@@ -836,7 +838,8 @@ class DTNNEmbedding(Layer):

		def build(self):

		self.embedding_list = self.init([self.periodic_table_length, self.n_embedding])
		self.embedding_list = self.init(
		[self.periodic_table_length, self.n_embedding])
		self.trainable_weights = [self.embedding_list]

		def call(self, x):
		@@ -856,10 +859,12 @@ class DTNNEmbedding(Layer):
		atom_features = tf.nn.embedding_lookup(self.embedding_list, x)
		return atom_features


		class DTNNStep(Layer):
		"""A convolution step that merge in distance and atom info of
		all other atoms into current atom.
		"""

		def __init__(self,
		n_embedding=20,
		n_distance=100,
		@@ -879,11 +884,16 @@ class DTNNStep(Layer):
		self.W_cf = self.init([self.n_embedding, self.n_hidden])
		self.W_df = self.init([self.n_distance, self.n_hidden])
		self.W_fc = self.init([self.n_hidden, self.n_embedding])
		self.b_cf = model_ops.zeros(shape=[self.n_hidden,])
		self.b_df = model_ops.zeros(shape=[self.n_hidden,])

		self.trainable_weights = [self.W_cf, self.W_df, self.W_fc,
		self.b_cf, self.b_df]
		self.b_cf = model_ops.zeros(shape=[
		self.n_hidden,
		])
		self.b_df = model_ops.zeros(shape=[
		self.n_hidden,
		])

		self.trainable_weights = [
		self.W_cf, self.W_df, self.W_fc, self.b_cf, self.b_df
		]

		def call(self, x):
		"""Execute this layer on input tensors.
		@@ -904,8 +914,11 @@ class DTNNStep(Layer):
		atom_features = x[0]
		distance_matrix = x[1]
		distance_matrix_mask = x[2]
		outputs = tf.multiply((tf.tensordot(distance_matrix, self.W_df, [[3], [0]]) + self.b_df),
		tf.expand_dims(tf.tensordot(atom_features, self.W_cf, [[2], [0]]) + self.b_cf, axis=1))
		outputs = tf.multiply(
		(tf.tensordot(distance_matrix, self.W_df, [[3], [0]]) + self.b_df),
		tf.expand_dims(
		tf.tensordot(atom_features, self.W_cf, [[2], [0]]) + self.b_cf,
		axis=1))
		# for atom i in a molecule m, this step multiplies together distance info of atom pair(i,j)
		# and embeddings of atom j(both gone through a hidden layer)
		outputs = tf.tensordot(outputs, self.W_fc, [[3], [0]])
		@@ -917,9 +930,11 @@ class DTNNStep(Layer):

		return outputs


		class DTNNGather(Layer):
		"""Map the atomic features into molecular properties and sum
		"""

		def __init__(self,
		n_tasks=1,
		n_embedding=20,
		@@ -943,8 +958,12 @@ class DTNNGather(Layer):
		for i in range(self.n_tasks):
		self.W_out1_list.append(self.init([self.n_embedding, self.n_hidden]))
		self.W_out2_list.append(self.init([self.n_hidden, 1]))
		self.b_out1_list.append(model_ops.zeros(shape=[self.n_hidden,]))
		self.b_out2_list.append(model_ops.zeros(shape=[1,]))
		self.b_out1_list.append(model_ops.zeros(shape=[
		self.n_hidden,
		]))
		self.b_out2_list.append(model_ops.zeros(shape=[
		1,
		]))

		self.trainable_weights = self.W_out1_list + self.W_out2_list + self.b_out1_list + self.b_out2_list

		@@ -964,9 +983,11 @@ class DTNNGather(Layer):
		self.build()
		outputs = []
		for i in range(self.n_tasks):
		output = tf.tensordot(x, self.W_out1_list[i], [[2], [0]]) + self.b_out1_list[i]
		output = tf.tensordot(x, self.W_out1_list[i],
		[[2], [0]]) + self.b_out1_list[i]
		output = self.activation(output)
		output = tf.tensordot(output, self.W_out2_list[i], [[2], [0]]) + self.b_out2_list[i]
		output = tf.tensordot(output, self.W_out2_list[i],
		[[2], [0]]) + self.b_out2_list[i]
		# each task has one independent hidden layer
		output = tf.reduce_sum(tf.squeeze(output, axis=2), axis=1)
		outputs.append(output)

Admin message