add docs (bba48cdf) · Commits · 钟慕尧 / deepchem

deepchem/models/tf_new_models/graph_topology.py

+53 −9

Original line number	Diff line number	Diff line
		@@ -337,6 +337,7 @@ class DAGGraphTopology(GraphTopology):

		atoms_all = []
		parents_all = []
		# calculation orders for a batch of molecules
		calculation_orders = []
		for idm, mol in enumerate(batch):
		atom_features_padded = np.concatenate(
		@@ -349,8 +350,9 @@ class DAGGraphTopology(GraphTopology):
		atoms_all.append(atom_features_padded)

		parents = self.UG_to_DAG(mol)
		# ConvMol objects input here should have gone through the DAG Transformer
		# calculation orders for DAGs
		assert len(parents) == atoms_per_mol[idm]
		# number of DAGs should equal number of atoms
		parents_all.extend(parents[:])
		parents_all.extend([
		self.max_atoms * np.ones((self.max_atoms, self.max_atoms), dtype=int)
		@@ -359,13 +361,16 @@ class DAGGraphTopology(GraphTopology):
		# padding with max_atoms
		for parent in parents:
		calculation_orders.append(self.indice_changing(parent[:, 0], idm))
		# change the indice from current molecule to batch of molecules
		# the indice for a specific atom in the molecule's DAGs and atom_features_placeholder
		# is different, this function changes the indice from the position in current molecule(DAGs)
		# to position in batch of molecules(atom_features_placeholder)
		# and this is only going to be used in tf.gather on atom_features_placeholder
		calculation_orders.extend([
		self.batch_size * self.max_atoms * np.ones(
		(self.max_atoms,), dtype=int)
		for i in range(self.max_atoms - atoms_per_mol[idm])
		])
		# padding with batch_size * max_atoms
		# padding with (batch_size*max_atoms)

		atoms_all = np.concatenate(atoms_all, axis=0)
		parents_all = np.stack(parents_all, axis=0)
		@@ -389,45 +394,84 @@ class DAGGraphTopology(GraphTopology):
		return output

		def UG_to_DAG(self, sample):
		"""This function generates the DAGs for a molecule
		"""
		parents = []
		# list of DAGs, one DAG represents the calculation orders
		# stemming from one specific atom in the molecule,
		# hence this list include k elements for a molecule with k atoms
		UG = sample.get_adjacency_list()
		# starting from the adjacency list derived by graphconv featurizer
		n_atoms = sample.get_num_atoms()
		# number of graphs need to be generated
		max_atoms = self.max_atoms
		# for a graph on a molecule with k atoms, there will be k steps,
		# each step calculate graph features for one atom,
		# maximum number of steps is the same as max_atoms
		for count in range(n_atoms):
		# each iteration generates one DAG
		# stemming from atom with indice `count`
		DAG = []
		parent = [[] for i in range(n_atoms)]
		# list of lists, each element(also a list) represents the calculation order
		# for every atom in the molecule in the current graph
		current_atoms = [count]
		# first element is current atom
		# starting from the atom with indice `count`
		atoms_indicator = np.ones((n_atoms,))
		# if is been included in the graph
		# flags, whether the atom is already included in the DAG
		atoms_indicator[count] = 0
		# atom `count` is in the DAG
		radial = 0
		# recording number of radial propagation steps
		while np.sum(atoms_indicator) > 0:
		# in this while loop, atoms directly connected to `count` will be first added into
		# the DAG(radial=0), then atoms two-bond away from `count` will be added in the
		# second loop(radial=1). Atoms i-bond away will be added in loop i
		if radial > n_atoms:
		break # molecules with two separate ions may stuck here
		break
		# when molecules have separate parts, starting from one part, it is not possible
		# to include all atoms.
		next_atoms = []
		# reinitialize targets for next iteration
		for current_atom in current_atoms:
		for atom_adj in UG[current_atom]:
		# atoms connected to current_atom
		if atoms_indicator[atom_adj] > 0:
		DAG.append((current_atom, atom_adj))
		# this for loop generates the dependency map of this DAG
		# atoms that connected to current_atoms(and not included in the DAG yet)
		# are added into DAG, and will be the current_atoms for next iteration.
		atoms_indicator[atom_adj] = 0
		# tagging for included atoms
		next_atoms.append(atom_adj)
		# including into targets for next iteration
		current_atoms = next_atoms
		# into next step, finding atoms connected with one more bond
		# into next iteration, finding atoms connected one more bond away
		radial = radial + 1
		for edge in reversed(DAG):
		# DAG starts from the target atom, hence the calculation should go in reverse
		parent[edge[0]].append(edge[1])
		# edge[1] is the parent of edge[0]
		parent[edge[0]].extend(parent[edge[1]])
		# adding parents
		# all the parents of edge[1] is also the parents of edge[0]
		# after this for loop, parents[i] is the list that includes all parents of atom i
		for ids, atom in enumerate(parent):
		parent[ids].insert(0, ids)
		# manually adding the atom indice into its parents list
		# after this for loop, parents[i][0] = i, parents[i][1:] are all parents of atom i
		parent = sorted(parent, key=len)
		# key part of this function, atoms with less parents come first,
		# so when we do a for loop on the list , atoms without parents will be first calculated
		# then atoms with more parents can be calculated based on calculated graph features.
		# the starting atom of this DAG will be calculated in the last step,
		# since every other atom is its parent.
		for ids, atom in enumerate(parent):
		n_par = len(atom)
		parent[ids].extend([max_atoms for i in range(max_atoms - n_par)])
		# padding with max_atoms
		while len(parent) < max_atoms:
		parent.insert(0, [max_atoms] * max_atoms)
		# padding
		parents.append(np.array(parent))
		# parents[i] is the calculation order for the DAG stemming from atom i,
		# which is a max_atoms * max_atoms numpy array(after padding)
		return parents

examples/qm7/qm7_DTNN.py

+7 −6

Original line number	Diff line number	Diff line
		@@ -23,19 +23,20 @@ metric = [

		# Batch size of models
		batch_size = 50
		n_embedding = 20
		graph_model = dc.nn.SequentialDTNNGraph(max_n_atoms=23, n_distance=100)
		graph_model.add(dc.nn.DTNNEmbedding(n_embedding=20))
		graph_model.add(dc.nn.DTNNStep(n_embedding=20, n_distance=100))
		graph_model.add(dc.nn.DTNNStep(n_embedding=20, n_distance=100))
		graph_model.add(dc.nn.DTNNGather(n_embedding=20))
		n_feat = 20
		graph_model.add(dc.nn.DTNNEmbedding(n_embedding=n_embedding))
		graph_model.add(dc.nn.DTNNStep(n_embedding=n_embedding, n_distance=100))
		graph_model.add(dc.nn.DTNNStep(n_embedding=n_embedding, n_distance=100))
		graph_model.add(dc.nn.DTNNGather(n_embedding=n_embedding))
		n_feat = n_embedding

		model = dc.models.DTNNGraphRegressor(
		graph_model,
		len(tasks),
		n_feat,
		batch_size=batch_size,
		learning_rate=1e-3,
		learning_rate=0.001,
		learning_rate_decay_time=1000,
		optimizer_type="adam",
		beta1=.9,

Admin message