fix bug and yapf (1a3c14b0) · Commits · 钟慕尧 / deepchem

README.md

+15 −8

Original line number	Diff line number	Diff line
		@@ -496,26 +496,29 @@ Scaffold splitting
		\| \|Graphconv regression\|Scaffold \|0.695 \|0.391 \|
		\| \|Weave regression \|Scaffold \|0.401 \|0.373 \|
		\|qm7 \|NN regression \|Index \|0.997 \|0.992 \|
		\| \|DTNN \|Index \|0.998 \|0.996 \|
		\| \|DTNN \|Index \|0.997 \|0.995 \|
		\| \|NN regression \|Random \|0.998 \|0.997 \|
		\| \|DTNN \|Random \|0.998 \|0.998 \|
		\| \|DTNN \|Random \|0.999 \|0.998 \|
		\| \|NN regression \|Stratified \|0.998 \|0.997 \|
		\| \|DTNN \|Stratified \|0.998 \|0.998 \|
		\|qm7b \|MT-NN regression \|Index \|0.903 \|0.789 \|
		\| \|DTNN \|Index \|0.872 \|0.821 \|
		\| \|DTNN \|Index \|0.919 \|0.863 \|
		\| \|MT-NN regression \|Random \|0.893 \|0.839 \|
		\| \|DTNN \|Random \|0.865 \|0.849 \|
		\| \|DTNN \|Random \|0.924 \|0.898 \|
		\| \|MT-NN regression \|Stratified \|0.891 \|0.859 \|
		\| \|DTNN \|Stratified \|0.853 \|0.839 \|
		\| \|DTNN \|Stratified \|0.913 \|0.894 \|
		\|qm8 \|MT-NN regression \|Index \|0.783 \|0.656 \|
		\| \|DTNN \|Index \|0.737 \|0.639 \|
		\| \|DTNN \|Index \|0.857 \|0.691 \|
		\| \|MT-NN regression \|Random \|0.747 \|0.660 \|
		\| \|DTNN \|Random \|0.731 \|0.711 \|
		\| \|DTNN \|Random \|0.842 \|0.756 \|
		\| \|MT-NN regression \|Stratified \|0.756 \|0.681 \|
		\| \|DTNN \|Stratified \|0.714 \|0.683 \|
		\| \|DTNN \|Stratified \|0.844 \|0.758 \|
		\|qm9 \|MT-NN regression \|Index \|0.733 \|0.766 \|
		\| \|DTNN \|Index \|0.918 \|0.831 \|
		\| \|MT-NN regression \|Random \|0.852 \|0.833 \|
		\| \|DTNN \|Random \|0.942 \|0.948 \|
		\| \|MT-NN regression \|Stratified \|0.764 \|0.792 \|
		\| \|DTNN \|Stratified \|0.941 \|0.867 \|
		\|sampl \|Random forest \|Index \|0.968 \|0.736 \|
		\| \|XGBoost \|Index \|0.884 \|0.784 \|
		\| \|NN regression \|Index \|0.917 \|0.764 \|
		@@ -675,9 +678,13 @@ Time needed for benchmark test(~20h in total)
		\| \|Graphconv regression\|20 \|100 \|
		\| \|Weave regression \|20 \|120 \|
		\|qm7 \|MT-NN regression \|10 \|400 \|
		\| \|DTNN \|10 \|600 \|
		\|qm7b \|MT-NN regression \|10 \|600 \|
		\| \|DTNN \|10 \|600 \|
		\|qm8 \|MT-NN regression \|60 \|1000 \|
		\| \|DTNN \|10 \|2000 \|
		\|qm9 \|MT-NN regression \|220 \|10000 \|
		\| \|DTNN \|10 \|14000 \|
		\|sampl \|NN regression \|10 \|30 \|
		\| \|XGBoost \|10 \|20 \|
		\| \|Random forest \|10 \|20 \|

deepchem/models/tensorgraph/graph_layers.py

+27 −17

Original line number	Diff line number	Diff line
		@@ -18,7 +18,9 @@ from deepchem.nn import model_ops

		from deepchem.models.tensorgraph.layers import Layer


		class Combine_AP(Layer):

		def __init__(self, **kwargs):
		super(Combine_AP, self).__init__(**kwargs)

		@@ -27,13 +29,16 @@ class Combine_AP(Layer):
		P = self.in_layers[1].out_tensor
		self.out_tensor = [A, P]


		class Separate_AP(Layer):

		def __init__(self, **kwargs):
		super(Separate_AP, self).__init__(**kwargs)

		def _create_tensor(self):
		self.out_tensor = self.in_layers[0].out_tensor[0]


		class WeaveLayer(Layer):
		""" TensorGraph style implementation
		The same as deepchem.nn.WeaveLayer
		@@ -173,6 +178,7 @@ class WeaveLayer(Layer):
		P = pair_features
		self.out_tensor = [A, P]


		class WeaveGather(Layer):
		""" TensorGraph style implementation
		The same as deepchem.nn.WeaveGather
		@@ -233,7 +239,6 @@ class WeaveGather(Layer):
		output_molecules = self.activation(output_molecules)
		self.out_tensor = output_molecules


		def gaussian_histogram(self, x):
		gaussian_memberships = [(-1.645, 0.283), (-1.080, 0.170), (-0.739, 0.134),
		(-0.468, 0.118), (-0.228, 0.114), (0., 0.114),
		@@ -352,6 +357,7 @@ class DTNNGather(Layer):
		""" TensorGraph style implementation
		The same as deepchem.nn.DTNNGather
		"""

		def __init__(self,
		n_embedding=30,
		n_outputs=100,
		@@ -398,10 +404,12 @@ class DTNNGather(Layer):
		output = tf.segment_sum(output, atom_membership)
		self.out_tensor = output


		class DAGLayer(Layer):
		""" TensorGraph style implementation
		The same as deepchem.nn.DAGLayer
		"""

		def __init__(self,
		n_graph_feat=30,
		n_atom_feat=75,
		@@ -481,27 +489,28 @@ class DAGLayer(Layer):

		n_atoms = self.in_layers[4].out_tensor
		# initialize graph features for each graph
		graph_features_initial = tf.zeros((self.max_atoms*self.batch_size, self.max_atoms+1, self.n_graph_feat))
		graph_features_initial = tf.zeros((self.max_atoms * self.batch_size,
		self.max_atoms + 1, self.n_graph_feat))
		# initialize graph features for each graph
		# another row of zeros is generated for padded dummy atoms
		graph_features = tf.Variable(
		graph_features_initial,
		trainable=False)
		graph_features = tf.Variable(graph_features_initial, trainable=False)

		for count in range(self.max_atoms):
		# `count`-th step
		# extracting atom features of target atoms: (batch_sizemax_atoms) n_atom_features
		mask = calculation_masks[:, count]
		current_round = tf.boolean_mask(calculation_orders[:, count], mask)
		batch_atom_features = tf.gather(atom_features,
		current_round)
		batch_atom_features = tf.gather(atom_features, current_round)

		# generating index for graph features used in the inputs
		index = tf.stack(
		[
		tf.reshape(
		tf.stack([tf.boolean_mask(tf.range(n_atoms), mask)] * (self.max_atoms - 1), axis=1),
		[-1]), tf.reshape(tf.boolean_mask(parents[:, count, 1:], mask), [-1])
		tf.stack(
		[tf.boolean_mask(tf.range(n_atoms), mask)] *
		(self.max_atoms - 1),
		axis=1), [-1]),
		tf.reshape(tf.boolean_mask(parents[:, count, 1:], mask), [-1])
		],
		axis=1)
		# extracting graph features for parents of the target atoms, then flatten
		@@ -539,6 +548,7 @@ class DAGGather(Layer):
		""" TensorGraph style implementation
		The same as deepchem.nn.DAGGather
		"""

		def __init__(self,
		n_graph_feat=30,
		n_outputs=30,

deepchem/models/tensorgraph/models/graph_models.py

+90 −65

Original line number	Diff line number	Diff line
		@@ -11,6 +11,7 @@ from deepchem.models.tensorgraph.graph_layers import WeaveLayer, WeaveGather, \
		from deepchem.metrics import to_one_hot, from_one_hot
		from deepchem.trans import undo_transforms


		class WeaveTensorGraph(TensorGraph):

		def __init__(self,
		@@ -49,7 +50,8 @@ class WeaveTensorGraph(TensorGraph):
		update_pair=False,
		in_layers=[weave_layer1, self.pair_split, self.atom_to_pair])
		separated = Separate_AP(in_layers=[weave_layer2])
		dense1 = Dense(out_channels=self.n_graph_feat,
		dense1 = Dense(
		out_channels=self.n_graph_feat,
		activation_fn=tf.nn.relu,
		in_layers=[separated])
		batch_norm1 = BatchNormLayer(in_layers=[dense1])
		@@ -63,7 +65,8 @@ class WeaveTensorGraph(TensorGraph):
		self.labels_fd = []
		for task in range(self.n_tasks):
		if self.mode == "classification":
		classification = Dense(out_channels=2, activation_fn=None, in_layers=[weave_gather])
		classification = Dense(
		out_channels=2, activation_fn=None, in_layers=[weave_gather])
		softmax = SoftMax(in_layers=[classification])
		self.add_output(softmax)

		@@ -72,7 +75,8 @@ class WeaveTensorGraph(TensorGraph):
		cost = SoftMaxCrossEntropy(in_layers=[label, classification])
		costs.append(cost)
		if self.mode == "regression":
		regression = Dense(out_channels=1, activation_fn=None, in_layers=[weave_gather])
		regression = Dense(
		out_channels=1, activation_fn=None, in_layers=[weave_gather])
		self.add_output(regression)

		label = Label(shape=(None, 1))
		@@ -119,7 +123,8 @@ class WeaveTensorGraph(TensorGraph):
		# index of pair features
		C0, C1 = np.meshgrid(np.arange(n_atoms), np.arange(n_atoms))
		atom_to_pair.append(
		np.transpose(np.array([C1.flatten() + start, C0.flatten() + start])))
		np.transpose(
		np.array([C1.flatten() + start, C0.flatten() + start])))
		# number of pairs for each atom
		pair_split.extend(C1.flatten() + start)
		start = start + n_atoms
		@@ -138,7 +143,6 @@ class WeaveTensorGraph(TensorGraph):
		feed_dict[self.atom_to_pair] = np.concatenate(atom_to_pair, axis=0)
		yield feed_dict


		def predict(self, dataset, transformers=[], batch_size=None):
		generator = self.default_generator(dataset, predict=True, pad_batches=False)
		return self.predict_on_generator(generator, transformers)
		@@ -175,6 +179,7 @@ class WeaveTensorGraph(TensorGraph):
		results.append(result)
		return np.concatenate(results, axis=0)


		class DTNNTensorGraph(TensorGraph):

		def __init__(self,
		@@ -206,15 +211,22 @@ class DTNNTensorGraph(TensorGraph):
		self.distance_membership_i = Feature(shape=(None,), dtype=tf.int32)
		self.distance_membership_j = Feature(shape=(None,), dtype=tf.int32)

		dtnn_embedding = DTNNEmbedding(n_embedding=self.n_embedding, in_layers=[self.atom_number])
		dtnn_embedding = DTNNEmbedding(
		n_embedding=self.n_embedding, in_layers=[self.atom_number])
		dtnn_layer1 = DTNNStep(
		n_embedding=self.n_embedding,
		n_distance=self.n_distance,
		in_layers=[dtnn_embedding, self.distance, self.distance_membership_i, self.distance_membership_j])
		in_layers=[
		dtnn_embedding, self.distance, self.distance_membership_i,
		self.distance_membership_j
		])
		dtnn_layer2 = DTNNStep(
		n_embedding=self.n_embedding,
		n_distance=self.n_distance,
		in_layers=[dtnn_layer1, self.distance, self.distance_membership_i, self.distance_membership_j])
		in_layers=[
		dtnn_layer1, self.distance, self.distance_membership_i,
		self.distance_membership_j
		])
		dtnn_gather = DTNNGather(
		n_embedding=self.n_embedding,
		n_outputs=self.n_hidden,
		@@ -223,7 +235,8 @@ class DTNNTensorGraph(TensorGraph):
		costs = []
		self.labels_fd = []
		for task in range(self.n_tasks):
		regression = Dense(out_channels=1, activation_fn=None, in_layers=[dtnn_gather])
		regression = Dense(
		out_channels=1, activation_fn=None, in_layers=[dtnn_gather])
		self.add_output(regression)

		label = Label(shape=(None, 1))
		@@ -281,13 +294,14 @@ class DTNNTensorGraph(TensorGraph):
		distance = np.concatenate(distance, 0)
		feed_dict[self.distance] = np.exp(-np.square(distance - self.steps) /
		(2 * self.step_size**2))
		feed_dict[self.distance_membership_i] = np.concatenate(distance_membership_i)
		feed_dict[self.distance_membership_j] = np.concatenate(distance_membership_j)
		feed_dict[self.distance_membership_i] = np.concatenate(
		distance_membership_i)
		feed_dict[self.distance_membership_j] = np.concatenate(
		distance_membership_j)
		feed_dict[self.atom_membership] = np.concatenate(atom_membership)

		yield feed_dict


		def predict(self, dataset, transformers=[], batch_size=None):
		generator = self.default_generator(dataset, predict=True, pad_batches=False)
		return self.predict_on_generator(generator, transformers)
		@@ -324,6 +338,7 @@ class DTNNTensorGraph(TensorGraph):
		results.append(result)
		return np.concatenate(results, axis=0)


		class DAGTensorGraph(TensorGraph):

		def __init__(self,
		@@ -343,9 +358,12 @@ class DAGTensorGraph(TensorGraph):

		def build_graph(self):
		self.atom_features = Feature(shape=(None, self.n_atom_feat))
		self.parents = Feature(shape=(None, self.max_atoms, self.max_atoms), dtype=tf.int32)
		self.calculation_orders = Feature(shape=(None, self.max_atoms), dtype=tf.int32)
		self.calculation_masks = Feature(shape=(None, self.max_atoms), dtype=tf.bool)
		self.parents = Feature(
		shape=(None, self.max_atoms, self.max_atoms), dtype=tf.int32)
		self.calculation_orders = Feature(
		shape=(None, self.max_atoms), dtype=tf.int32)
		self.calculation_masks = Feature(
		shape=(None, self.max_atoms), dtype=tf.bool)
		self.membership = Feature(shape=(None,), dtype=tf.int32)
		self.n_atoms = Feature(shape=(), dtype=tf.int32)
		dag_layer1 = DAGLayer(
		@@ -353,7 +371,10 @@ class DAGTensorGraph(TensorGraph):
		n_atom_feat=self.n_atom_feat,
		max_atoms=self.max_atoms,
		batch_size=self.batch_size,
		in_layers=[self.atom_features, self.parents, self.calculation_orders, self.calculation_masks, self.n_atoms])
		in_layers=[
		self.atom_features, self.parents, self.calculation_orders,
		self.calculation_masks, self.n_atoms
		])
		dag_gather = DAGGather(
		n_graph_feat=self.n_graph_feat,
		n_outputs=self.n_outputs,
		@@ -364,7 +385,8 @@ class DAGTensorGraph(TensorGraph):
		self.labels_fd = []
		for task in range(self.n_tasks):
		if self.mode == "classification":
		classification = Dense(out_channels=2, activation_fn=None, in_layers=[dag_gather])
		classification = Dense(
		out_channels=2, activation_fn=None, in_layers=[dag_gather])
		softmax = SoftMax(in_layers=[classification])
		self.add_output(softmax)

		@@ -373,7 +395,8 @@ class DAGTensorGraph(TensorGraph):
		cost = SoftMaxCrossEntropy(in_layers=[label, classification])
		costs.append(cost)
		if self.mode == "regression":
		regression = Dense(out_channels=1, activation_fn=None, in_layers=[dag_gather])
		regression = Dense(
		out_channels=1, activation_fn=None, in_layers=[dag_gather])
		self.add_output(regression)

		label = Label(shape=(None, 1))
		@@ -430,8 +453,10 @@ class DAGTensorGraph(TensorGraph):

		feed_dict[self.atom_features] = np.concatenate(atoms_all, axis=0)
		feed_dict[self.parents] = np.stack(parents_all, axis=0)
		feed_dict[self.calculation_orders] = np.concatenate(calculation_orders, axis=0)
		feed_dict[self.calculation_masks] = np.concatenate(calculation_masks, axis=0)
		feed_dict[self.calculation_orders] = np.concatenate(
		calculation_orders, axis=0)
		feed_dict[self.calculation_masks] = np.concatenate(
		calculation_masks, axis=0)
		feed_dict[self.membership] = np.array(membership)
		feed_dict[self.n_atoms] = n_atoms
		yield feed_dict

deepchem/models/tests/test_overfit.py

+3 −5

Original line number	Diff line number	Diff line
		@@ -677,10 +677,9 @@ class TestOverfit(test_util.TensorFlowTestCase):
		regression_metric = dc.metrics.Metric(
		dc.metrics.pearson_r2_score, task_averager=np.mean)
		n_tasks = y.shape[1]
		max_n_atoms = list(dataset.get_data_shape())[0]
		batch_size = 10

		graph_model = dc.nn.SequentialDTNNGraph(max_n_atoms=max_n_atoms)
		graph_model = dc.nn.SequentialDTNNGraph()
		graph_model.add(dc.nn.DTNNEmbedding(n_embedding=20))
		graph_model.add(dc.nn.DTNNStep(n_embedding=20))
		graph_model.add(dc.nn.DTNNStep(n_embedding=20))
		@@ -728,9 +727,8 @@ class TestOverfit(test_util.TensorFlowTestCase):
		transformer = dc.trans.DAGTransformer(max_atoms=50)
		dataset = transformer.transform(dataset)

		graph = dc.nn.SequentialDAGGraph(
		n_feat, batch_size=batch_size, max_atoms=50)
		graph.add(dc.nn.DAGLayer(30, n_feat, max_atoms=50))
		graph = dc.nn.SequentialDAGGraph(n_atom_feat=n_feat, max_atoms=50)
		graph.add(dc.nn.DAGLayer(30, n_feat, max_atoms=50, batch_size=batch_size))
		graph.add(dc.nn.DAGGather(max_atoms=50))

		model = dc.models.MultitaskGraphRegressor(

deepchem/models/tf_new_models/graph_models.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -149,7 +149,8 @@ class SequentialDAGGraph(SequentialGraph):
		self.output = layer([self.output] +
		self.graph_topology.get_topology_placeholders())
		elif type(layer).__name__ in ['DAGGather']:
		self.output = layer([self.output, self.graph_topology.membership_placeholder])
		self.output = layer(
		[self.output, self.graph_topology.membership_placeholder])
		else:
		self.output = layer(self.output)
		self.layers.append(layer)

Original line number	Diff line number	Diff line
		@@ -496,26 +496,29 @@ Scaffold splitting
		\| \|Graphconv regression\|Scaffold \|0.695 \|0.391 \|
		\| \|Weave regression \|Scaffold \|0.401 \|0.373 \|
		\|qm7 \|NN regression \|Index \|0.997 \|0.992 \|
		\| \|DTNN \|Index \|0.998 \|0.996 \|
		\| \|DTNN \|Index \|0.997 \|0.995 \|
		\| \|NN regression \|Random \|0.998 \|0.997 \|
		\| \|DTNN \|Random \|0.998 \|0.998 \|
		\| \|DTNN \|Random \|0.999 \|0.998 \|
		\| \|NN regression \|Stratified \|0.998 \|0.997 \|
		\| \|DTNN \|Stratified \|0.998 \|0.998 \|
		\|qm7b \|MT-NN regression \|Index \|0.903 \|0.789 \|
		\| \|DTNN \|Index \|0.872 \|0.821 \|
		\| \|DTNN \|Index \|0.919 \|0.863 \|
		\| \|MT-NN regression \|Random \|0.893 \|0.839 \|
		\| \|DTNN \|Random \|0.865 \|0.849 \|
		\| \|DTNN \|Random \|0.924 \|0.898 \|
		\| \|MT-NN regression \|Stratified \|0.891 \|0.859 \|
		\| \|DTNN \|Stratified \|0.853 \|0.839 \|
		\| \|DTNN \|Stratified \|0.913 \|0.894 \|
		\|qm8 \|MT-NN regression \|Index \|0.783 \|0.656 \|
		\| \|DTNN \|Index \|0.737 \|0.639 \|
		\| \|DTNN \|Index \|0.857 \|0.691 \|
		\| \|MT-NN regression \|Random \|0.747 \|0.660 \|
		\| \|DTNN \|Random \|0.731 \|0.711 \|
		\| \|DTNN \|Random \|0.842 \|0.756 \|
		\| \|MT-NN regression \|Stratified \|0.756 \|0.681 \|
		\| \|DTNN \|Stratified \|0.714 \|0.683 \|
		\| \|DTNN \|Stratified \|0.844 \|0.758 \|
		\|qm9 \|MT-NN regression \|Index \|0.733 \|0.766 \|
		\| \|DTNN \|Index \|0.918 \|0.831 \|
		\| \|MT-NN regression \|Random \|0.852 \|0.833 \|
		\| \|DTNN \|Random \|0.942 \|0.948 \|
		\| \|MT-NN regression \|Stratified \|0.764 \|0.792 \|
		\| \|DTNN \|Stratified \|0.941 \|0.867 \|
		\|sampl \|Random forest \|Index \|0.968 \|0.736 \|
		\| \|XGBoost \|Index \|0.884 \|0.784 \|
		\| \|NN regression \|Index \|0.917 \|0.764 \|
		@@ -675,9 +678,13 @@ Time needed for benchmark test(~20h in total)
		\| \|Graphconv regression\|20 \|100 \|
		\| \|Weave regression \|20 \|120 \|
		\|qm7 \|MT-NN regression \|10 \|400 \|
		\| \|DTNN \|10 \|600 \|
		\|qm7b \|MT-NN regression \|10 \|600 \|
		\| \|DTNN \|10 \|600 \|
		\|qm8 \|MT-NN regression \|60 \|1000 \|
		\| \|DTNN \|10 \|2000 \|
		\|qm9 \|MT-NN regression \|220 \|10000 \|
		\| \|DTNN \|10 \|14000 \|
		\|sampl \|NN regression \|10 \|30 \|
		\| \|XGBoost \|10 \|20 \|
		\| \|Random forest \|10 \|20 \|

Admin message