Merge branch 'master' into master (eaef5832) · Commits · 钟慕尧 / deepchem

README.md

+3 −3

Original line number	Diff line number	Diff line
		@@ -773,15 +773,15 @@ DeepChem started as a [Pande group](https://pande.stanford.edu/) project at Stan
		DeepChem is supported by a number of corporate partners who use DeepChem to solve interesting problems.

		### Schrödinger
		[![Schödinger](https://github.com/deepchem/deepchem/raw/master/docs/_static/schrodinger_logo.png)](https://www.schrodinger.com/)
		[![Schödinger](https://github.com/deepchem/deepchem/blob/master/docs/source/_static/schrodinger_logo.png)](https://www.schrodinger.com/)

		> DeepChem has transformed how we think about building QSAR and QSPR models when very large data sets are available; and we are actively using DeepChem to investigate how to best combine the power of deep learning with next generation physics-based scoring methods.

		### DeepCrystal
		<img src="https://raw.githubusercontent.com/deepchem/deepchem/master/docs/_static/deep_crystal_logo.png" alt="DeepCrystal Logo" height=150px/>
		<img src="https://github.com/deepchem/deepchem/blob/master/docs/source/_static/deep_crystal_logo.png" alt="DeepCrystal Logo" height=150px/>

		> DeepCrystal was an early adopter of DeepChem, which we now rely on to abstract away some of the hardest pieces of deep learning in drug discovery. By open sourcing these efficient implementations of chemically / biologically aware deep-learning systems, DeepChem puts the latest research into the hands of the scientists that need it, materially pushing forward the field of in-silico drug discovery in the process.


		## Version
		1.1.0
		1.2.0

deepchem/models/tensorgraph/layers.py

+0 −206

Original line number	Diff line number	Diff line
		@@ -2614,209 +2614,3 @@ class AtomicConvolution(Layer):
		R = tf.reduce_sum(tf.multiply(D, D), 3)
		R = tf.sqrt(R)
		return R


		def AlphaShare(in_layers=None, **kwargs):
		"""
		This method should be used when constructing AlphaShare layers from Sluice Networks

		Parameters
		----------
		in_layers: list of Layers or tensors
		tensors in list must be the same size and list must include two or more tensors

		Returns
		-------
		output_layers: list of Layers or tensors with same size as in_layers
		Distance matrix.

		References:
		Sluice networks: Learning what to share between loosely related tasks
		https://arxiv.org/abs/1705.08142
		"""
		output_layers = []
		alpha_share = AlphaShareLayer(in_layers=in_layers, **kwargs)
		num_outputs = len(in_layers)
		for num_layer in range(0, num_outputs):
		ls = LayerSplitter(output_num=num_layer, in_layers=alpha_share)
		output_layers.append(ls)
		return output_layers


		class AlphaShareLayer(Layer):
		"""
		Part of a sluice network. Adds alpha parameters to control
		sharing between the main and auxillary tasks

		Factory method AlphaShare should be used for construction

		Parameters
		----------
		in_layers: list of Layers or tensors
		tensors in list must be the same size and list must include two or more tensors

		Returns
		-------
		out_tensor: a tensor with shape [len(in_layers), x, y] where x, y were the original layer dimensions
		out_tensor should be fed into LayerSplitter
		Distance matrix.
		"""

		def __init__(self, **kwargs):
		super(AlphaShareLayer, self).__init__(**kwargs)

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		# check that there isnt just one or zero inputs
		if len(inputs) <= 1:
		raise ValueError("AlphaShare must have more than one input")
		self.num_outputs = len(inputs)
		# create subspaces
		subspaces = []
		original_cols = int(inputs[0].get_shape()[-1].value)
		subspace_size = int(original_cols / 2)
		for input_tensor in inputs:
		subspaces.append(tf.reshape(input_tensor[:, :subspace_size], [-1]))
		subspaces.append(tf.reshape(input_tensor[:, subspace_size:], [-1]))
		n_alphas = len(subspaces)
		subspaces = tf.reshape(tf.stack(subspaces), [n_alphas, -1])

		# create the alpha learnable parameters
		alphas = tf.Variable(tf.random_normal([n_alphas, n_alphas]), name='alphas')

		subspaces = tf.matmul(alphas, subspaces)

		# concatenate subspaces, reshape to size of original input, then stack
		# such that out_tensor has shape (2,?,original_cols)
		count = 0
		out_tensor = []
		tmp_tensor = []
		for row in range(n_alphas):
		tmp_tensor.append(tf.reshape(subspaces[row,], [-1, subspace_size]))
		count += 1
		if (count == 2):
		out_tensor.append(tf.concat(tmp_tensor, 1))
		tmp_tensor = []
		count = 0

		out_tensor = tf.stack(out_tensor)

		self.alphas = alphas
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor

		def none_tensors(self):
		num_outputs, out_tensor, alphas = self.num_outputs, self.out_tensor, self.alphas
		self.num_outputs = None
		self.out_tensor = None
		self.alphas = None
		return num_outputs, out_tensor, alphas

		def set_tensors(self, tensor):
		self.num_outputs, self.out_tensor, self.alphas = tensor


		class LayerSplitter(Layer):
		"""
		Returns the nth output of a layer
		Assumes out_tensor has shape [x, :] where x is the total number of intended output tensors
		"""

		def __init__(self, output_num, **kwargs):
		"""
		Parameters
		----------
		output_num: int
		returns the out_tensor[output_num, :] of a layer
		"""
		self.output_num = output_num
		super(LayerSplitter, self).__init__(**kwargs)

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)[0]
		self.out_tensor = inputs[self.output_num, :]
		out_tensor = self.out_tensor
		return self.out_tensor

		def none_tensors(self):
		out_tensor = self.out_tensor
		self.out_tensor = None
		return out_tensor

		def set_tensors(self, tensor):
		self.out_tensor = tensor


		class SluiceLoss(Layer):
		"""
		Calculates the loss in a Sluice Network
		Every input into an AlphaShare should be used in SluiceLoss
		"""

		def __init__(self, **kwargs):
		super(SluiceLoss, self).__init__(**kwargs)

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		temp = []
		subspaces = []
		# creates subspaces the same way it was done in AlphaShare
		for input_tensor in inputs:
		subspace_size = int(input_tensor.get_shape()[-1].value / 2)
		subspaces.append(input_tensor[:, :subspace_size])
		subspaces.append(input_tensor[:, subspace_size:])
		product = tf.matmul(tf.transpose(subspaces[0]), subspaces[1])
		subspaces = []
		# calculate squared Frobenius norm
		temp.append(tf.reduce_sum(tf.pow(product, 2)))
		out_tensor = tf.reduce_sum(temp)
		self.out_tensor = out_tensor
		return out_tensor


		class BetaShare(Layer):
		"""
		Part of a sluice network. Adds beta params to control which layer
		outputs are used for prediction

		Parameters
		----------
		in_layers: list of Layers or tensors
		tensors in list must be the same size and list must include two or more tensors

		Returns
		-------
		output_layers: list of Layers or tensors with same size as in_layers
		Distance matrix.
		"""

		def __init__(self, **kwargs):
		super(BetaShare, self).__init__(**kwargs)

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		"""
		Size of input layers must all be the same
		"""
		inputs = self._get_input_tensors(in_layers)
		subspaces = []
		original_cols = int(inputs[0].get_shape()[-1].value)
		for input_tensor in inputs:
		subspaces.append(tf.reshape(input_tensor, [-1]))
		n_betas = len(inputs)
		subspaces = tf.reshape(tf.stack(subspaces), [n_betas, -1])

		betas = tf.Variable(tf.random_normal([1, n_betas]), name='betas')
		out_tensor = tf.matmul(betas, subspaces)
		self.betas = betas
		self.out_tensor = tf.reshape(out_tensor, [-1, original_cols])
		return self.out_tensor

		def none_tensors(self):
		out_tensor, betas = self.out_tensor, self.betas
		self.out_tensor = None
		self.betas = None
		return out_tensor, betas

		def set_tensors(self, tensor):
		self.out_tensor, self.betas = tensor

deepchem/models/tensorgraph/tensor_graph.py

+28 −24

Original line number	Diff line number	Diff line
		@@ -2,7 +2,6 @@ import pickle
		import threading
		import time

		import networkx as nx
		import collections
		import numpy as np
		import os
		@@ -56,7 +55,6 @@ class TensorGraph(Model):
		"""

		# Layer Management
		self.nxgraph = nx.DiGraph()
		self.layers = dict()
		self.features = list()
		self.labels = list()
		@@ -108,11 +106,9 @@ class TensorGraph(Model):
		self.labels.append(layer)
		if isinstance(layer, Weights):
		self.task_weights.append(layer)
		self.nxgraph.add_node(layer.name)
		self.layers[layer.name] = layer
		for in_layer in layer.in_layers:
		self._add_layer(in_layer)
		self.nxgraph.add_edge(in_layer.name, layer.name)

		def fit(self,
		dataset,
		@@ -215,6 +211,7 @@ class TensorGraph(Model):
		print('Ending global_step %d: Average loss %g' % (self.global_step,
		avg_loss))
		avg_loss, n_batches = 0.0, 0.0
		if n_batches > 0:
		avg_loss = float(avg_loss) / n_batches
		print('Ending global_step %d: Average loss %g' % (self.global_step,
		avg_loss))
		@@ -410,7 +407,19 @@ class TensorGraph(Model):
		return self.predict_proba_on_generator(generator, transformers, outputs)

		def topsort(self):
		return nx.topological_sort(self.nxgraph)

		def add_layers_to_list(layer, sorted_layers):
		if layer in sorted_layers:
		return
		for in_layer in layer.in_layers:
		add_layers_to_list(in_layer, sorted_layers)
		sorted_layers.append(layer)

		sorted_layers = []
		for l in self.features + self.labels + self.task_weights + self.outputs:
		add_layers_to_list(l, sorted_layers)
		add_layers_to_list(self.loss, sorted_layers)
		return sorted_layers

		def build(self):
		if self.built:
		@@ -420,15 +429,13 @@ class TensorGraph(Model):
		if self.random_seed is not None:
		tf.set_random_seed(self.random_seed)
		self._install_queue()
		order = self.topsort()
		for node in order:
		with tf.name_scope(node):
		node_layer = self.layers[node]
		node_layer.create_tensor(training=self._training_placeholder)
		self.rnn_initial_states += node_layer.rnn_initial_states
		self.rnn_final_states += node_layer.rnn_final_states
		self.rnn_zero_states += node_layer.rnn_zero_states
		node_layer.add_summary_to_tg()
		for layer in self.topsort():
		with tf.name_scope(layer.name):
		layer.create_tensor(training=self._training_placeholder)
		self.rnn_initial_states += layer.rnn_initial_states
		self.rnn_final_states += layer.rnn_final_states
		self.rnn_zero_states += layer.rnn_zero_states
		layer.add_summary_to_tg()
		self.session = tf.Session()

		self.built = True
		@@ -475,7 +482,6 @@ class TensorGraph(Model):
		pre_q_inputs.append(pre_q_input)

		layer.in_layers.append(q)
		self.nxgraph.add_edge(q.name, layer.name)

		self._add_layer(q)
		self.input_queue = q
		@@ -533,9 +539,8 @@ class TensorGraph(Model):
		out_tensors = []
		if self.built:
		must_restore = True
		for node in self.topsort():
		node_layer = self.layers[node]
		out_tensors.append(node_layer.none_tensors())
		for layer in self.topsort():
		out_tensors.append(layer.none_tensors())
		optimizer = self.optimizer
		self.optimizer = None
		training_placeholder = self._training_placeholder
		@@ -554,9 +559,8 @@ class TensorGraph(Model):

		# add out_tensor back to everyone
		if must_restore:
		for index, node in enumerate(self.topsort()):
		node_layer = self.layers[node]
		node_layer.set_tensors(out_tensors[index])
		for index, layer in enumerate(self.topsort()):
		layer.set_tensors(out_tensors[index])
		self._training_placeholder = training_placeholder
		self.optimizer = optimizer
		self.built = True

deepchem/models/tensorgraph/tests/test_layers.py

+1 −5

Original line number	Diff line number	Diff line
		@@ -46,10 +46,6 @@ from deepchem.models.tensorgraph.layers import TensorWrapper
		from deepchem.models.tensorgraph.layers import LSTMStep
		from deepchem.models.tensorgraph.layers import AttnLSTMEmbedding
		from deepchem.models.tensorgraph.layers import IterRefLSTMEmbedding
		from deepchem.models.tensorgraph.layers import AlphaShareLayer
		from deepchem.models.tensorgraph.layers import BetaShare
		from deepchem.models.tensorgraph.layers import SluiceLoss
		from deepchem.models.tensorgraph.layers import LayerSplitter

		import deepchem as dc

deepchem/utils/rdkit_util.py

+2 −1

Original line number	Diff line number	Diff line
		import logging

		import networkx as nx
		import numpy as np
		import os

		@@ -223,6 +222,7 @@ class PdbqtLigandWriter(object):
		The single public function of this class.
		It converts a molecule and a pdb file into a pdbqt file stored in outfile
		"""
		import networkx as nx
		self._create_pdb_map()
		self._mol_to_graph()
		self._get_rotatable_bonds()
		@@ -347,6 +347,7 @@ class PdbqtLigandWriter(object):
		atoms are nodes, and bonds are vertices
		store as self.graph
		"""
		import networkx as nx
		G = nx.Graph()
		num_atoms = self.mol.GetNumAtoms()
		G.add_nodes_from(range(num_atoms))

Admin message