refining model (6808c495) · Commits · 钟慕尧 / deepchem

deepchem/models/tf_new_models/graph_models.py

+13 −21

Original line number	Diff line number	Diff line
		@@ -80,23 +80,28 @@ class SequentialGraph(object):
		return self.layers[layer_id]

		class SequentialDTNNGraph(SequentialGraph):
		"""An analog of Keras Sequential class for Graph data.
		"""An analog of Keras Sequential class for Coulomb Matrix data.

		Like the Sequential class from Keras, but automatically passes topology
		placeholders from GraphTopology to each graph layer (from layers) added
		to the network. Non graph layers don't get the extra placeholders.
		automatically generates and passes topology placeholders to each layer.
		"""

		def __init__(self, max_n_atoms=30, n_distance=100):
		def __init__(self, max_n_atoms, n_distance=100, distance_min=-1., distance_max=18.):
		"""
		Parameters
		----------
		n_feat: int
		Number of features per atom.
		max_n_atoms: int
		maximum number of atoms in a molecule
		n_distance: int, optional
		granularity of distance matrix
		step size will be (distance_max-distance_min)/n_distance
		distance_min: float, optional
		minimum distance of atom pairs, default = -1 Angstorm
		distance_max: float, optional
		maximum distance of atom pairs, default = 18 Angstorm
		"""
		self.graph = tf.Graph()
		with self.graph.as_default():
		self.graph_topology = DTNNGraphTopology(max_n_atoms, n_distance)
		self.graph_topology = DTNNGraphTopology(max_n_atoms, n_distance, distance_min=distance_min, distance_max=distance_max)
		self.output = self.graph_topology.get_atom_number_placeholder()
		# Keep track of the layers
		self.layers = []
		@@ -104,24 +109,11 @@ class SequentialDTNNGraph(SequentialGraph):
		def add(self, layer):
		"""Adds a new layer to model."""
		with self.graph.as_default():
		############################################# DEBUG
		#print("start - add()")
		#print("self.output")
		#print(self.output)
		############################################# DEBUG
		# For graphical layers, add connectivity placeholders
		if type(layer).__name__ in ['DTNNStep']:
		self.output = layer([self.output] +
		self.graph_topology.get_topology_placeholders())
		else:
		self.output = layer(self.output)
		############################################# DEBUG
		#print("end- add()")
		#print("self.output")
		#print(self.output)
		############################################# DEBUG

		# Add layer to the layer list
		self.layers.append(layer)

deepchem/models/tf_new_models/graph_topology.py

+21 −21

Original line number	Diff line number	Diff line
		@@ -144,28 +144,27 @@ class GraphTopology(object):
		class DTNNGraphTopology(GraphTopology):
		"""Manages placeholders associated with batch of graphs and their topology"""

		def __init__(self, max_n_atoms=30, n_distance=100, name='DTNN_topology'):
		def __init__(self, max_n_atoms, n_distance=100, distance_min=-1., distance_max=18., name='DTNN_topology'):
		"""
		Note that batch size is not specified in a GraphTopology object. A batch
		of molecules must be combined into a disconnected graph and fed to topology
		directly to handle batches.

		Parameters
		----------
		n_feat: int
		Number of features per atom.
		name: str, optional
		Name of this manager.
		max_deg: int, optional
		Maximum #bonds for atoms in molecules.
		min_deg: int, optional
		Minimum #bonds for atoms in molecules.
		max_n_atoms: int
		maximum number of atoms in a molecule
		n_distance: int, optional
		granularity of distance matrix
		step size will be (distance_max-distance_min)/n_distance
		distance_min: float, optional
		minimum distance of atom pairs, default = -1 Angstorm
		distance_max: float, optional
		maximum distance of atom pairs, default = 18 Angstorm
		"""

		#self.n_atoms = n_atoms
		self.name = name
		self.max_n_atoms = max_n_atoms
		self.n_distance = n_distance
		self.distance_min = distance_min
		self.distance_max = distance_max

		self.atom_number_placeholder = tf.placeholder(
		dtype='int32',
		@@ -193,22 +192,22 @@ class DTNNGraphTopology(GraphTopology):
		return self.distance_matrix_placeholder

		def batch_to_feed_dict(self, batch):
		"""Converts the current batch of mol_graphs into tensorflow feed_dict.
		"""Converts the current batch of Coulomb Matrix into tensorflow feed_dict.

		Assigns the graph information in array of ConvMol objects to the
		Assigns the atom number and distance info to the
		placeholders tensors

		params
		------
		batch : np.ndarray
		Array of ConvMol objects
		Array of Coulomb Matrix

		returns
		-------
		feed_dict : dict
		Can be merged with other feed_dicts for input into tensorflow
		"""
		# Merge mol conv objects
		# Extract atom numbers
		atom_number = np.asarray(map(np.diag, batch))
		atom_number = np.asarray(np.round(np.power(2*atom_number, 1/2.4)), dtype=int)
		ZiZj = []
		@@ -222,8 +221,9 @@ class DTNNGraphTopology(GraphTopology):
		for ir, row in enumerate(molecule):
		for ie, element in enumerate(row):
		if element>0 and ir != ie:
		# expand a float value distance to a distance vector
		distance_matrix[im, ir, ie, :] = self.gauss_expand(ZiZj[im, ir, ie]/element,
		self.n_distance)
		self.n_distance, self.distance_min, self.distance_max)
		distance_matrix_mask[im, ir, ie] = 1
		else:
		distance_matrix[im, ir, ie, :] = 0
		@@ -237,7 +237,7 @@ class DTNNGraphTopology(GraphTopology):
		return dict_DTNN

		@staticmethod
		def gauss_expand(distance, n_distance, distance_min=-1., distance_max=18.):
		def gauss_expand(distance, n_distance, distance_min, distance_max):
		step_size = (distance_max - distance_min)/n_distance
		steps = np.array([distance_min+i*step_size for i in range(n_distance)])
		distance_vector = np.exp(-np.square(distance - steps)/(2step_size*2))

deepchem/molnet/preset_hyper_parameters.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -95,7 +95,8 @@ hps['DTNN'] = {
		'batch_size': 128,
		'nb_epoch': 20,
		'learning_rate': 0.0005,
		'n_hidden': 20,
		'n_embedding': 20,
		'n_hidden': 50,
		'n_distance': 100,
		'seed': 123
		}

deepchem/molnet/run_benchmark.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -82,7 +82,7 @@ def run_benchmark(datasets,
		n_features = 75
		elif model in [
		'tf', 'tf_robust', 'logreg', 'rf', 'irv', 'tf_regression',
		'rf_regression'
		'rf_regression', 'DTNN'
		]:
		featurizer = 'ECFP'
		n_features = 1024
		@@ -99,7 +99,7 @@ def run_benchmark(datasets,
		return
		elif dataset in ['qm7', 'qm7b', 'qm9']:
		featurizer = None # qm* datasets are already featurized
		if isinstance(model, str) and not model in ['tf_regression']:
		if isinstance(model, str) and not model in ['tf_regression', 'DTNN']:
		return
		elif model in ['tf_regression']:
		model = 'tf_regression_ft'

deepchem/molnet/run_benchmark_models.py

+10 −9

Original line number	Diff line number	Diff line
		@@ -303,7 +303,7 @@ def benchmark_regression(
		test_scores = {}

		assert model in [
		'tf_regression', 'tf_regression_ft', 'rf_regression', 'graphconvreg'
		'tf_regression', 'tf_regression_ft', 'rf_regression', 'graphconvreg', 'DTNN'
		]
		if hyper_parameters is None:
		hyper_parameters = hps[model]
		@@ -408,17 +408,18 @@ def benchmark_regression(
		nb_epoch = hyper_parameters['nb_epoch']
		learning_rate = hyper_parameters['learning_rate']
		n_distance = hyper_parameters['n_distance']
		n_embedding = hyper_parameters['n_embedding']
		n_hidden = hyper_parameters['n_hidden']

		tf.set_random_seed(seed)
		graph_model = dc.nn.SequentialDTNNGraph(max_n_atoms=n_features[0],
		graph_model = deepchem.nn.SequentialDTNNGraph(max_n_atoms=n_features[0],
		n_distance=n_distance)
		graph_model.add(dc.nn.DTNNEmbedding(n_features=n_hidden))
		graph_model.add(dc.nn.DTNNStep(n_features=n_hidden, n_distance=n_distance))
		graph_model.add(dc.nn.DTNNStep(n_features=n_hidden, n_distance=n_distance))
		graph_model.add(dc.nn.DTNNGather(n_tasks=len(tasks)))
		graph_model.add(deepchem.nn.DTNNEmbedding(n_embedding=n_embedding))
		graph_model.add(deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
		graph_model.add(deepchem.nn.DTNNStep(n_embedding=n_embedding, n_distance=n_distance))
		graph_model.add(deepchem.nn.DTNNGather(n_tasks=len(tasks), n_embedding=n_embedding, n_hidden=n_hidden))

		model = dc.models.DTNNRegressor(
		model = deepchem.models.DTNNRegressor(
		graph_model,
		n_tasks=len(tasks),
		batch_size=batch_size,

Admin message