temp save (adc38975) · Commits · 钟慕尧 / deepchem

deepchem/models/tf_new_models/graph_models.py

+8 −5

Original line number	Diff line number	Diff line
		@@ -166,15 +166,14 @@ class SequentialWeaveGraph(SequentialGraph):
		"""SequentialGraph for Weave models
		"""

		def __init__(self, batch_size, n_atom_feat=75, n_pair_feat=14, max_atoms=100):
		def __init__(self, max_atoms=50, n_atom_feat=75, n_pair_feat=14):
		self.graph = tf.Graph()
		self.batch_size = batch_size
		self.max_atoms = max_atoms
		self.n_atom_feat = n_atom_feat
		self.n_pair_feat = n_pair_feat
		with self.graph.as_default():
		self.graph_topology = WeaveGraphTopology(self.batch_size, self.n_atom_feat,
		self.n_pair_feat, self.max_atoms)
		self.graph_topology = WeaveGraphTopology(self.max_atoms, self.n_atom_feat,
		self.n_pair_feat)
		self.output = self.graph_topology.get_atom_features_placeholder()
		self.output_P = self.graph_topology.get_pair_features_placeholder()
		self.layers = []
		@@ -186,14 +185,18 @@ class SequentialWeaveGraph(SequentialGraph):
		self.output, self.output_P = layer([
		self.output, self.output_P
		] + self.graph_topology.get_topology_placeholders())
		elif type(layer).__name__ in ['WeaveConcat']:
		self.output = layer(
		[self.output, self.graph_topology.atom_mask_placeholder])
		elif type(layer).__name__ in ['WeaveGather']:
		self.output = layer(
		[self.output, self.graph_topology.atom_split_placeholder])
		[self.output, self.graph_topology.membership_placeholder])
		else:
		self.output = layer(self.output)
		self.layers.append(layer)



		class SequentialSupportGraph(object):
		"""An analog of Keras Sequential model for test/support models."""

deepchem/models/tf_new_models/graph_topology.py

+39 −53

Original line number	Diff line number	Diff line
		@@ -397,8 +397,8 @@ class DAGGraphTopology(GraphTopology):
		class WeaveGraphTopology(GraphTopology):
		"""Manages placeholders associated with batch of graphs and their topology"""

		def __init__(self, batch_size, n_atom_feat, n_pair_feat,
		max_atoms=100, name='Weave_topology'):
		def __init__(self, max_atoms, n_atom_feat, n_pair_feat,
		name='Weave_topology'):
		"""
		Parameters
		----------
		@@ -412,36 +412,30 @@ class WeaveGraphTopology(GraphTopology):

		#self.n_atoms = n_atoms
		self.name = name
		self.batch_size = batch_size
		self.max_atoms = max_atoms
		self.n_atom_feat = n_atom_feat
		self.n_pair_feat = n_pair_feat
		self.max_atoms = max_atoms * batch_size

		self.atom_features_placeholder = tf.placeholder(
		dtype='float32',
		shape=(None, self.n_atom_feat),
		shape=(None, self.max_atoms, self.n_atom_feat),
		name=self.name + '_atom_features')
		self.atom_mask_placeholder = tf.placeholder(
		dtype='float32',
		shape=(None, self.max_atoms),
		name=self.name + '_atom_mask')
		self.pair_features_placeholder = tf.placeholder(
		dtype='float32',
		shape=(None, self.n_pair_feat),
		shape=(None, self.max_atoms, self.max_atoms, self.n_pair_feat),
		name=self.name + '_pair_features')
		self.pair_split_placeholder = tf.placeholder(
		dtype='int32', shape=(self.max_atoms,),
		name=self.name + '_pair_split')
		self.pair_membership_placeholder = tf.placeholder(
		dtype='bool', shape=(self.max_atoms,),
		name=self.name + '_pair_membership')
		self.atom_split_placeholder = tf.placeholder(
		dtype='int32', shape=(self.batch_size,),
		name=self.name + '_atom_split')
		self.atom_to_pair_placeholder = tf.placeholder(
		dtype='int32', shape=(None,2),
		name=self.name + '_atom_to_pair')


		self.pair_mask_placeholder = tf.placeholder(
		dtype='float32',
		shape=(None, self.max_atoms, self.max_atoms),
		name=self.name + '_pair_mask')
		self.membership_placeholder = tf.placeholder(
		dtype='int32', shape=(None,), name=self.name + '_membership')
		# Define the list of tensors to be used as topology
		self.topology = [self.pair_split_placeholder, self.pair_membership_placeholder,
		self.atom_split_placeholder, self.atom_to_pair_placeholder]
		self.topology = [self.atom_mask_placeholder, self.pair_mask_placeholder]
		self.inputs = [self.atom_features_placeholder]
		self.inputs += self.topology

		@@ -467,42 +461,34 @@ class WeaveGraphTopology(GraphTopology):
		# Extract atom numbers
		atom_feat = []
		pair_feat = []
		atom_split = []
		atom_to_pair = []
		pair_split = []
		atom_mask = []
		pair_mask = []
		membership = []
		max_atoms = self.max_atoms
		start = 0
		for im, mol in enumerate(batch):
		n_atoms = mol.get_num_atoms()
		# number of atoms in each molecule
		atom_split.append(n_atoms)
		# index of pair features
		C0, C1 = np.meshgrid(np.arange(n_atoms), np.arange(n_atoms))
		atom_to_pair.append(np.transpose(np.array([C1.flatten()+start, C0.flatten()+start])))
		start = start + n_atoms
		# number of pairs for each atom
		pair_split.extend([n_atoms]*n_atoms)
		# atom features
		atom_feat.append(mol.get_atom_features())
		# pair features
		pair_feat.append(np.reshape(mol.get_pair_features(),
		(n_atoms*n_atoms, self.n_pair_feat)))

		atom_feat = np.concatenate(atom_feat, axis=0)
		pair_feat = np.concatenate(pair_feat, axis=0)
		atom_to_pair = np.concatenate(atom_to_pair, axis=0)
		atom_split = np.array(atom_split)
		n_pair = len(pair_split)
		pair_split = np.pad(pair_split, ((0, max_atoms-n_pair)), 'constant')
		pair_membership = np.array([True]n_pair + [False](max_atoms-n_pair))

		atom_feat.append(
		np.pad(mol.get_atom_features(), ((0, max_atoms - n_atoms), (0, 0)),
		'constant'))
		atom_mask.append(
		np.array([1] * n_atoms + [0] * (max_atoms - n_atoms), dtype=float))
		pair_feat.append(
		np.pad(mol.get_pair_features(), ((0, max_atoms - n_atoms), (
		0, max_atoms - n_atoms), (0, 0)), 'constant'))
		pair_mask.append(np.array([[1]n_atoms + [0](max_atoms-n_atoms)]*n_atoms + \
		[[0]max_atoms](max_atoms-n_atoms), dtype=float))
		membership.extend([im] * n_atoms)
		atom_feat = np.stack(atom_feat)
		pair_feat = np.stack(pair_feat)
		atom_mask = np.stack(atom_mask)
		pair_mask = np.stack(pair_mask)
		membership = np.array(membership)
		# Generate dicts
		dict_DTNN = {
		self.atom_features_placeholder: atom_feat,
		self.pair_features_placeholder: pair_feat,
		self.pair_split_placeholder: pair_split,
		self.pair_membership_placeholder: pair_membership,
		self.atom_split_placeholder: atom_split,
		self.atom_to_pair_placeholder: atom_to_pair
		self.atom_mask_placeholder: atom_mask,
		self.pair_mask_placeholder: pair_mask,
		self.membership_placeholder: membership
		}
		return dict_DTNN

deepchem/molnet/run_benchmark_models.py

+20 −10

Original line number	Diff line number	Diff line
		@@ -266,11 +266,16 @@ def benchmark_classification(train_dataset,
		n_graph_feat = hyper_parameters['n_graph_feat']
		n_pair_feat = hyper_parameters['n_pair_feat']

		graph_model = deepchem.nn.SequentialWeaveGraph(
		batch_size, n_atom_feat=n_features, n_pair_feat=n_pair_feat, max_atoms=120)
		graph_model.add(deepchem.nn.WeaveLayer(75, 14))
		graph_model.add(deepchem.nn.WeaveLayer(50, 50))
		graph_model.add(deepchem.nn.Dense(n_graph_feat, 50, activation='tanh'))
		max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
		max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
		max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
		max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

		graph_model = deepchem.nn.SequentialWeaveGraph(max_atoms=max_atoms,
		n_atom_feat=n_features, n_pair_feat=n_pair_feat)
		graph_model.add(deepchem.nn.WeaveLayer(max_atoms, 75, 14))
		graph_model.add(deepchem.nn.WeaveLayer(max_atoms, 50, 50, update_pair=False))
		graph_model.add(deepchem.nn.WeaveConcat(batch_size, n_output=n_graph_feat))
		graph_model.add(deepchem.nn.BatchNormalization(epsilon=1e-5, mode=1))
		graph_model.add(
		deepchem.nn.WeaveGather(
		@@ -585,11 +590,16 @@ def benchmark_regression(train_dataset,
		n_graph_feat = hyper_parameters['n_graph_feat']
		n_pair_feat = hyper_parameters['n_pair_feat']

		graph_model = deepchem.nn.SequentialWeaveGraph(
		batch_size, n_atom_feat=n_features, n_pair_feat=n_pair_feat, max_atoms=80)
		graph_model.add(deepchem.nn.WeaveLayer(75, 14))
		graph_model.add(deepchem.nn.WeaveLayer(50, 50))
		graph_model.add(deepchem.nn.Dense(n_graph_feat, 50, activation='tanh'))
		max_atoms_train = max([mol.get_num_atoms() for mol in train_dataset.X])
		max_atoms_valid = max([mol.get_num_atoms() for mol in valid_dataset.X])
		max_atoms_test = max([mol.get_num_atoms() for mol in test_dataset.X])
		max_atoms = max([max_atoms_train, max_atoms_valid, max_atoms_test])

		graph_model = deepchem.nn.SequentialWeaveGraph(max_atoms=max_atoms,
		n_atom_feat=n_features, n_pair_feat=n_pair_feat)
		graph_model.add(deepchem.nn.WeaveLayer(max_atoms, 75, 14))
		graph_model.add(deepchem.nn.WeaveLayer(max_atoms, 50, 50, update_pair=False))
		graph_model.add(deepchem.nn.WeaveConcat(batch_size, n_output=n_graph_feat))
		graph_model.add(deepchem.nn.BatchNormalization(epsilon=1e-5, mode=1))
		graph_model.add(
		deepchem.nn.WeaveGather(

deepchem/nn/init.py

+1 −0

Original line number	Diff line number	Diff line
		@@ -21,6 +21,7 @@ from deepchem.nn.layers import DAGLayer
		from deepchem.nn.layers import DAGGather

		from deepchem.nn.weave_layers import WeaveLayer
		from deepchem.nn.weave_layers import WeaveConcat
		from deepchem.nn.weave_layers import WeaveGather

		from deepchem.nn.model_ops import weight_decay

deepchem/nn/weave_layers.py

+122 −47

Original line number	Diff line number	Diff line
		@@ -29,6 +29,7 @@ class WeaveLayer(Layer):
		"""

		def __init__(self,
		max_atoms,
		n_atom_input_feat=75,
		n_pair_input_feat=14,
		n_atom_output_feat=50,
		@@ -37,6 +38,7 @@ class WeaveLayer(Layer):
		n_hidden_PA=50,
		n_hidden_AP=50,
		n_hidden_PP=50,
		update_pair=True,
		init='glorot_uniform',
		activation='relu',
		dropout=None,
		@@ -63,8 +65,10 @@ class WeaveLayer(Layer):

		"""
		super(WeaveLayer, self).__init__(**kwargs)
		self.max_atoms = max_atoms
		self.init = initializations.get(init) # Set weight initialization
		self.activation = activations.get(activation) # Get activations
		self.update_pair = update_pair # last weave layer does not need to update
		self.n_hidden_AA = n_hidden_AA
		self.n_hidden_PA = n_hidden_PA
		self.n_hidden_AP = n_hidden_AP
		@@ -96,30 +100,27 @@ class WeaveLayer(Layer):
		self.n_atom_output_feat,
		])

		self.trainable_weights = [
		self.W_AA, self.b_AA, self.W_PA, self.b_PA, self.W_A, self.b_A
		]
		if self.update_pair:
		self.W_AP = self.init([self.n_atom_input_feat * 2, self.n_hidden_AP])
		self.b_AP = model_ops.zeros(shape=[
		self.n_hidden_AP,
		])
		self.b_AP = model_ops.zeros(shape=[self.n_hidden_AP,])

		self.W_PP = self.init([self.n_pair_input_feat, self.n_hidden_PP])
		self.b_PP = model_ops.zeros(shape=[
		self.n_hidden_PP,
		])
		self.b_PP = model_ops.zeros(shape=[self.n_hidden_PP,])

		self.W_P = self.init([self.n_hidden_P, self.n_pair_output_feat])
		self.b_P = model_ops.zeros(shape=[
		self.n_pair_output_feat,
		])
		self.b_P = model_ops.zeros(shape=[self.n_pair_output_feat,])

		self.trainable_weights = [
		self.W_AA, self.b_AA, self.W_PA, self.b_PA, self.W_A, self.b_A,
		self.trainable_weights.extend([
		self.W_AP, self.b_AP, self.W_PP, self.b_PP, self.W_P, self.b_P
		]
		])

		def call(self, x, mask=None):
		"""Execute this layer on input tensors.

		x = [atom_features, pair_features, pair_split, pair_membership, atom_split]
		x = [atom_features, pair_features, atom_mask, pair_mask]

		Parameters
		----------
		@@ -141,39 +142,113 @@ class WeaveLayer(Layer):
		atom_features = x[0]
		pair_features = x[1]

		pair_split = x[2]
		pair_membership = x[3]
		atom_split = x[4]
		atom_to_pair = x[5]
		atom_mask = x[2]
		pair_mask = x[3]
		max_atoms = self.max_atoms

		AA = tf.matmul(atom_features, self.W_AA) + self.b_AA
		AA = tf.tensordot(atom_features, self.W_AA, [[2], [0]]) + self.b_AA
		AA = self.activation(AA)

		PA = tf.matmul(pair_features, self.W_PA) + self.b_PA
		PA = tf.reduce_sum(
		tf.tensordot(pair_features, self.W_PA, [[3], [0]]) + self.b_PA, axis=2)
		PA = self.activation(PA)
		PAs = tf.split(PA, pair_split, axis=0)
		PA = [tf.reduce_sum(molecule, 0) for molecule in PAs]
		PA = tf.boolean_mask(PA, pair_membership)

		A = tf.matmul(tf.concat([AA, PA], 1), self.W_A) + self.b_A
		A = tf.tensordot(tf.concat([AA, PA], 2), self.W_A, [[2], [0]]) + self.b_A
		A = self.activation(A)

		AP_ij = tf.matmul(tf.reshape(tf.gather(atom_features, atom_to_pair),
		[-1, 2*self.n_atom_input_feat]), self.W_AP) + self.b_AP
		AP_ij = self.activation(AP_ij)
		AP_ji = tf.matmul(tf.reshape(tf.gather(atom_features, tf.reverse(atom_to_pair, [1])),
		[-1, 2*self.n_atom_input_feat]), self.W_AP) + self.b_AP
		AP_ji = self.activation(AP_ji)

		PP = tf.matmul(pair_features, self.W_PP) + self.b_PP
		A = tf.multiply(A, tf.expand_dims(atom_mask, axis=2))

		if self.update_pair:
		AP_combine = tf.concat([
		tf.stack([atom_features] * max_atoms, axis=2),
		tf.stack([atom_features] * max_atoms, axis=1)
		], 3)
		AP_combine_t = tf.transpose(AP_combine, perm=[0, 2, 1, 3])
		AP = tf.tensordot(AP_combine + AP_combine_t, self.W_AP,
		[[3], [0]]) + self.b_AP
		AP = self.activation(AP)
		PP = tf.tensordot(pair_features, self.W_PP, [[3], [0]]) + self.b_PP
		PP = self.activation(PP)

		P = tf.matmul(tf.concat([AP_ij + AP_ji, PP], 1), self.W_P) + self.b_P
		P = tf.tensordot(tf.concat([AP, PP], 3), self.W_P, [[3], [0]]) + self.b_P
		P = self.activation(P)
		P = tf.multiply(P, tf.expand_dims(pair_mask, axis=3))
		else:
		P = pair_features

		return A, P


		class WeaveConcat(Layer):
		"""" Concat a batch of molecules into a batch of atoms
		"""

		def __init__(self,
		batch_size,
		n_atom_input_feat=50,
		n_output=128,
		init='glorot_uniform',
		activation='tanh',
		**kwargs):
		"""
		Parameters
		----------
		batch_size: int
		number of molecules in a batch
		n_atom_input_feat: int, optional
		Number of features for each atom in input.
		n_output: int, optional
		Number of output features for each atom(concatenated)
		init: str, optional
		Weight initialization for filters.
		activation: str, optional
		Activation function applied

		"""
		self.batch_size = batch_size
		self.n_atom_input_feat = n_atom_input_feat
		self.n_output = n_output
		self.init = initializations.get(init) # Set weight initialization
		self.activation = activations.get(activation) # Get activations
		super(WeaveConcat, self).__init__(**kwargs)

		def build(self):
		""""Construct internal trainable weights.
		"""

		self.W = self.init([self.n_atom_input_feat, self.n_output])
		self.b = model_ops.zeros(shape=[
		self.n_output,
		])

		self.trainable_weights = self.W + self.b

		def call(self, x, mask=None):
		"""Execute this layer on input tensors.

		x = [atom_features, atom_mask]

		Parameters
		----------
		x: list
		Tensors as listed above
		mask: bool, optional
		Ignored. Present only to shadow superclass call() method.

		Returns
		-------
		outputs: Tensor
		Tensor of concatenated atom features
		"""
		self.build()
		atom_features = x[0]
		atom_masks = x[1]
		A = tf.split(atom_features, self.batch_size, axis=0)
		A_mask = tf.split(
		tf.cast(atom_masks, dtype=tf.bool), self.batch_size, axis=0)
		outputs = tf.concat(
		[tf.boolean_mask(A[i], A_mask[i]) for i in range(len(A))], axis=0)
		outputs = tf.matmul(outputs, self.W) + self.b
		outputs = self.activation(outputs)
		return outputs


		class WeaveGather(Layer):
		"""" Gather layer of Weave model
		a batch of normalized atom features go through a hidden layer,
		@@ -220,7 +295,7 @@ class WeaveGather(Layer):
		def call(self, x, mask=None):
		"""Execute this layer on input tensors.

		x = [atom_features, atom_split]
		x = [atom_features, membership]

		Parameters
		----------
		@@ -237,12 +312,12 @@ class WeaveGather(Layer):
		# Add trainable weights
		self.build()
		outputs = x[0]
		atom_split = x[1]
		membership = x[1]

		if self.gaussian_expand:
		outputs = self.gaussian_histogram(outputs)

		outputs = tf.split(outputs, atom_split, axis=0)
		outputs = tf.dynamic_partition(outputs, membership, self.batch_size)

		output_molecules = [tf.reduce_sum(molecule, 0) for molecule in outputs]

Admin message