Merge branch 'master' into update-script (4419b3b6) · Commits · 钟慕尧 / deepchem

deepchem/feat/graph_features.py

+12 −6

Original line number	Original line	Diff line number	Diff line
	import numpy as np		import numpy as np
	from rdkit import Chem

	import deepchem as dc		import deepchem as dc
	from deepchem.feat import Featurizer		from deepchem.feat import Featurizer
	@@ -55,11 +54,9 @@ possible_atom_list = [
	possible_numH_list = [0, 1, 2, 3, 4]		possible_numH_list = [0, 1, 2, 3, 4]
	possible_valence_list = [0, 1, 2, 3, 4, 5, 6]		possible_valence_list = [0, 1, 2, 3, 4, 5, 6]
	possible_formal_charge_list = [-3, -2, -1, 0, 1, 2, 3]		possible_formal_charge_list = [-3, -2, -1, 0, 1, 2, 3]
	possible_hybridization_list = [		# To avoid importing rdkit, this is a placeholder list of the correct
	Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,		# length. These will be replaced with rdkit HybridizationType below
	Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D,		possible_hybridization_list = ["SP", "SP2", "SP3", "SP3D", "SP3D2"]
	Chem.rdchem.HybridizationType.SP3D2
	]
	possible_number_radical_e_list = [0, 1, 2]		possible_number_radical_e_list = [0, 1, 2]
	possible_chirality_list = ['R', 'S']		possible_chirality_list = ['R', 'S']

	@@ -84,6 +81,14 @@ def get_feature_list(atom):
	atom: RDKit.rdchem.Atom		atom: RDKit.rdchem.Atom
	Atom to get features for		Atom to get features for
	"""		"""
			# Replace the hybridization
			from rdkit import Chem
			global possible_hybridization_list
			possible_hybridization_list = [
			Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2,
			Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D,
			Chem.rdchem.HybridizationType.SP3D2
			]
	features = 6 * [0]		features = 6 * [0]
	features[0] = safe_index(possible_atom_list, atom.GetSymbol())		features[0] = safe_index(possible_atom_list, atom.GetSymbol())
	features[1] = safe_index(possible_numH_list, atom.GetTotalNumHs())		features[1] = safe_index(possible_numH_list, atom.GetTotalNumHs())
	@@ -91,6 +96,7 @@ def get_feature_list(atom):
	features[3] = safe_index(possible_formal_charge_list, atom.GetFormalCharge())		features[3] = safe_index(possible_formal_charge_list, atom.GetFormalCharge())
	features[4] = safe_index(possible_number_radical_e_list,		features[4] = safe_index(possible_number_radical_e_list,
	atom.GetNumRadicalElectrons())		atom.GetNumRadicalElectrons())

	features[5] = safe_index(possible_hybridization_list, atom.GetHybridization())		features[5] = safe_index(possible_hybridization_list, atom.GetHybridization())
	return features		return features

deepchem/feat/tests/test_graph_features.py

+2 −2

Original line number	Original line	Diff line number	Diff line
	@@ -22,7 +22,7 @@ class TestConvMolFeaturizer(unittest.TestCase):
	# Note there is a central nitrogen of degree 4, with 4 carbons		# Note there is a central nitrogen of degree 4, with 4 carbons
	# of degree 1 (connected only to central nitrogen).		# of degree 1 (connected only to central nitrogen).
	raw_smiles = ['C[N+](C)(C)C']		raw_smiles = ['C[N+](C)(C)C']
	import rdkit		import rdkit.Chem
	mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]		mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
	featurizer = ConvMolFeaturizer()		featurizer = ConvMolFeaturizer()
	mols = featurizer.featurize(mols)		mols = featurizer.featurize(mols)
	@@ -70,7 +70,7 @@ class TestConvMolFeaturizer(unittest.TestCase):
	def test_alkane(self):		def test_alkane(self):
	"""Test on simple alkane"""		"""Test on simple alkane"""
	raw_smiles = ['CCC']		raw_smiles = ['CCC']
	import rdkit		import rdkit.Chem
	mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]		mols = [rdkit.Chem.MolFromSmiles(s) for s in raw_smiles]
	featurizer = ConvMolFeaturizer()		featurizer = ConvMolFeaturizer()
	mol_list = featurizer.featurize(mols)		mol_list = featurizer.featurize(mols)

deepchem/metrics/init.py

+158 −86

Original line number	Original line	Diff line number	Diff line
	@@ -2,8 +2,8 @@

	import numpy as np		import numpy as np
	import warnings		import warnings
	from deepchem.utils.save import log
	import sklearn.metrics		import sklearn.metrics
			import logging
	from sklearn.metrics import matthews_corrcoef		from sklearn.metrics import matthews_corrcoef
	from sklearn.metrics import recall_score		from sklearn.metrics import recall_score
	from sklearn.metrics import r2_score		from sklearn.metrics import r2_score
	@@ -16,14 +16,23 @@ from sklearn.metrics import jaccard_score
	from sklearn.metrics import f1_score		from sklearn.metrics import f1_score
	from scipy.stats import pearsonr		from scipy.stats import pearsonr

			logger = logging.getLogger(__name__)


	def to_one_hot(y, n_classes=2):		def to_one_hot(y, n_classes=2):
	"""Transforms label vector into one-hot encoding.		"""Transforms label vector into one-hot encoding.

	Turns y into vector of shape [n_samples, 2] (assuming binary labels).		Turns y into vector of shape `(n_samples, n_classes)` with a one-hot
			encoding.

			Parameters
			----------
	y: np.ndarray		y: np.ndarray
	A vector of shape [n_samples, 1]		A vector of shape `(n_samples, 1)`

			Returns
			-------
			A numpy.ndarray of shape `(n_samples, n_classes)`.
	"""		"""
	n_samples = np.shape(y)[0]		n_samples = np.shape(y)[0]
	y_hot = np.zeros((n_samples, n_classes))		y_hot = np.zeros((n_samples, n_classes))
	@@ -34,8 +43,16 @@ def to_one_hot(y, n_classes=2):
	def from_one_hot(y, axis=1):		def from_one_hot(y, axis=1):
	"""Transorms label vector from one-hot encoding.		"""Transorms label vector from one-hot encoding.

			Parameters
			----------
	y: np.ndarray		y: np.ndarray
	A vector of shape [n_samples, num_classes]		A vector of shape `(n_samples, num_classes)`
			axis: int, optional (default 1)
			The axis with one-hot encodings to reduce on.

			Returns
			-------
			A numpy.ndarray of shape `(n_samples,)`
	"""		"""
	return np.argmax(y, axis=axis)		return np.argmax(y, axis=axis)

	@@ -62,6 +79,24 @@ def roc_auc_score(y, y_pred):


	def accuracy_score(y, y_pred):		def accuracy_score(y, y_pred):
			"""Compute accuracy score

			Computes accuracy score for classification tasks. Works for both
			binary and multiclass classification.

			Parameters
			----------
			y: np.ndarray
			Of shape `(N_samples,)`
			y_pred: np.ndarray
			Of shape `(N_samples,)`

			Returns
			-------
			score: float
			The fraction of correctly classified samples. A number between 0
			and 1.
			"""
	y = _ensure_class_labels(y)		y = _ensure_class_labels(y)
	y_pred = _ensure_class_labels(y_pred)		y_pred = _ensure_class_labels(y_pred)
	return sklearn.metrics.accuracy_score(y, y_pred)		return sklearn.metrics.accuracy_score(y, y_pred)
	@@ -83,8 +118,7 @@ def pearson_r2_score(y, y_pred):


	def jaccard_index(y, y_pred):		def jaccard_index(y, y_pred):
	"""Computes Jaccard Index which is the Intersection Over Union metric		"""Computes Jaccard Index which is the Intersection Over Union metric which is commonly used in image segmentation tasks
	which is commonly used in image segmentation tasks

	Parameters		Parameters
	----------		----------
	@@ -95,13 +129,17 @@ def jaccard_index(y, y_pred):


	def pixel_error(y, y_pred):		def pixel_error(y, y_pred):
	"""defined as 1 - the maximal F-score of pixel similarity,		"""An error metric in case y, y_pred are images.
	or squared Euclidean distance between the original and the result labels.
			Defined as 1 - the maximal F-score of pixel similarity, or squared
			Euclidean distance between the original and the result labels.

	Parameters		Parameters
	----------		----------
	y: ground truth array		y: np.ndarray
	y_pred: predicted array		ground truth array
			y_pred: np.ndarray
			predicted array
	"""		"""
	return 1 - f1_score(y, y_pred)		return 1 - f1_score(y, y_pred)

	@@ -133,16 +171,22 @@ def kappa_score(y_true, y_pred):

	Note that this implementation of Cohen's kappa expects binary labels.		Note that this implementation of Cohen's kappa expects binary labels.

	Args:		Parameters
	y_true: Numpy array containing true values.		----------
	y_pred: Numpy array containing predicted values.		y_true: np.ndarray
			Numpy array containing true values.
			y_pred: np.ndarray
			Numpy array containing predicted values.

	Returns:		Returns
	kappa: Numpy array containing kappa for each classification task.		-------
			kappa: np.ndarray
			Numpy array containing kappa for each classification task.

	Raises:		Raises
	AssertionError: If y_true and y_pred are not the same size, or if class		------
	labels are not in [0, 1].		AssertionError: If y_true and y_pred are not the same size, or if
			class labels are not in [0, 1].
	"""		"""
	assert len(y_true) == len(y_pred), 'Number of examples does not match.'		assert len(y_true) == len(y_pred), 'Number of examples does not match.'
	yt = np.asarray(y_true, dtype=int)		yt = np.asarray(y_true, dtype=int)
	@@ -165,11 +209,8 @@ def bedroc_score(y_true, y_pred, alpha=20.0):
	"""BEDROC metric implemented according to Truchon and Bayley that modifies		"""BEDROC metric implemented according to Truchon and Bayley that modifies
	the ROC score by allowing for a factor of early recognition		the ROC score by allowing for a factor of early recognition

	References:		Parameters
	The original paper by Truchon et al. is located at		----------
	https://pubs.acs.org/doi/pdf/10.1021/ci600426e

	Args:
	y_true (array_like):		y_true (array_like):
	Binary class labels. 1 for positive class, 0 otherwise		Binary class labels. 1 for positive class, 0 otherwise
	y_pred (array_like):		y_pred (array_like):
	@@ -177,9 +218,14 @@ def bedroc_score(y_true, y_pred, alpha=20.0):
	alpha (float), default 20.0:		alpha (float), default 20.0:
	Early recognition parameter		Early recognition parameter

	Returns:		Returns
			-------
	float: Value in [0, 1] that indicates the degree of early recognition		float: Value in [0, 1] that indicates the degree of early recognition

			Notes
			-----
			The original paper by Truchon et al. is located at
			https://pubs.acs.org/doi/pdf/10.1021/ci600426e
	"""		"""

	assert len(y_true) == len(y_pred), 'Number of examples do not match'		assert len(y_true) == len(y_pred), 'Number of examples do not match'
	@@ -203,23 +249,45 @@ def bedroc_score(y_true, y_pred, alpha=20.0):


	class Metric(object):		class Metric(object):
	"""Wrapper class for computing user-defined metrics."""		"""Wrapper class for computing user-defined metrics.

			There are a variety of different metrics this class aims to support.
			At the most simple, metrics for classification and regression that
			assume that values to compare are scalars. More complicated, there
			may perhaps be two image arrays that need to be compared.

			The `Metric` class provides a wrapper for standardizing the API
			around different classes of metrics that may be useful for DeepChem
			models. The implementation provides a few non-standard conveniences
			such as built-in support for multitask and multiclass metrics, and
			support for multidimensional outputs.
			"""

	def __init__(self,		def __init__(self,
	metric,		metric,
	task_averager=None,		task_averager=None,
	name=None,		name=None,
	threshold=None,		threshold=None,
	verbose=True,
	mode=None,		mode=None,
	compute_energy_metric=False):		compute_energy_metric=False):
	"""		"""
	Args:		Parameters
	metric: function that takes args y_true, y_pred (in that order) and		----------
			metric: function
			function that takes args y_true, y_pred (in that order) and
	computes desired score.		computes desired score.
	task_averager: If not None, should be a function that averages metrics		task_averager: function, optional
	across tasks. For example, task_averager=np.mean. If task_averager		If not None, should be a function that averages metrics across
	is provided, this task will be inherited as a multitask metric.		tasks. For example, task_averager=np.mean. If task_averager is
			provided, this task will be inherited as a multitask metric.
			name: str, optional
			Name of this metric
			threshold: float, optional
			Used for binary metrics and is the threshold for the positive
			class
			mode: str, optional
			Must be either classification or regression.
			compute_energy_metric: TODO(rbharath): Should this be removed?
	"""		"""
	self.metric = metric		self.metric = metric
	self.task_averager = task_averager		self.task_averager = task_averager
	@@ -231,13 +299,12 @@ class Metric(object):
	self.name = self.task_averager.__name__ + "-" + self.metric.__name__		self.name = self.task_averager.__name__ + "-" + self.metric.__name__
	else:		else:
	self.name = name		self.name = name
	self.verbose = verbose
	self.threshold = threshold		self.threshold = threshold
	if mode is None:		if mode is None:
	if self.metric.__name__ in [		if self.metric.__name__ in [
	"roc_auc_score", "matthews_corrcoef", "recall_score",		"roc_auc_score", "matthews_corrcoef", "recall_score",
	"accuracy_score", "kappa_score", "precision_score",		"accuracy_score", "kappa_score", "precision_score",
	"balanced_accuracy_score", "prc_auc_score", "f1_score"		"balanced_accuracy_score", "prc_auc_score", "f1_score", "bedroc_score"
	]:		]:
	mode = "classification"		mode = "classification"
	elif self.metric.__name__ in [		elif self.metric.__name__ in [
	@@ -311,7 +378,7 @@ class Metric(object):

	metric_value = self.compute_singletask_metric(y_task, y_pred_task, w_task)		metric_value = self.compute_singletask_metric(y_task, y_pred_task, w_task)
	computed_metrics.append(metric_value)		computed_metrics.append(metric_value)
	log("computed_metrics: %s" % str(computed_metrics), self.verbose)		logger.info("computed_metrics: %s" % str(computed_metrics))
	if n_tasks == 1:		if n_tasks == 1:
	computed_metrics = computed_metrics[0]		computed_metrics = computed_metrics[0]
	if not self.is_multitask:		if not self.is_multitask:
	@@ -334,14 +401,19 @@ class Metric(object):
	def compute_singletask_metric(self, y_true, y_pred, w):		def compute_singletask_metric(self, y_true, y_pred, w):
	"""Compute a metric value.		"""Compute a metric value.

	Args:		Parameters
	y_true: A list of arrays containing true values for each task.		----------
	y_pred: A list of arrays containing predicted values for each task.		y_true: list
			A list of arrays containing true values for each task.
			y_pred: list
			A list of arrays containing predicted values for each task.

	Returns:		Returns
			-------
	Float metric value.		Float metric value.

	Raises:		Raises
			------
	NotImplementedError: If metric_str is not in METRICS.		NotImplementedError: If metric_str is not in METRICS.
	"""		"""

deepchem/models/layers.py

+155 −46

Original line number	Original line	Diff line number	Diff line
	@@ -24,6 +24,13 @@ class InteratomicL2Distances(tf.keras.layers.Layer):
	return config		return config

	def call(self, inputs):		def call(self, inputs):
			"""Invokes this layer.

			Parameters
			----------
			inputs: list
			Should be of form `inputs=[coords, nbr_list]` where `coords` is a tensor of shape `(None, N, 3)` and `nbr_list` is a list.
			"""
	if len(inputs) != 2:		if len(inputs) != 2:
	raise ValueError("InteratomicDistances requires coords,nbr_list")		raise ValueError("InteratomicDistances requires coords,nbr_list")
	coords, nbr_list = (inputs[0], inputs[1])		coords, nbr_list = (inputs[0], inputs[1])
	@@ -38,6 +45,16 @@ class InteratomicL2Distances(tf.keras.layers.Layer):


	class GraphConv(tf.keras.layers.Layer):		class GraphConv(tf.keras.layers.Layer):
			"""Graph Convolutional Layers

			This layer implements the graph convolution introduced in

			Duvenaud, David K., et al. "Convolutional networks on graphs for learning molecular fingerprints." Advances in neural information processing systems. 2015. https://arxiv.org/abs/1509.09292

			The graph convolution combines per-node feature vectures in a
			nonlinear fashion with the feature vectors for neighboring nodes.
			This "blends" information in local neighborhoods of a graph.
			"""

	def __init__(self,		def __init__(self,
	out_channel,		out_channel,
	@@ -45,6 +62,24 @@ class GraphConv(tf.keras.layers.Layer):
	max_deg=10,		max_deg=10,
	activation_fn=None,		activation_fn=None,
	**kwargs):		**kwargs):
			"""Initialize a graph convolutional layer.

			Parameters
			----------
			out_channel: int
			The number of output channels per graph node.
			min_deg: int, optional (default 0)
			The minimum allowed degree for each graph node.
			max_deg: int, optional (default 10)
			The maximum allowed degree for each graph node. Note that this
			is set to 10 to handle complex molecules (some organometallic
			compounds have strange structures). If you're using this for
			non-molecular applications, you may need to set this much higher
			depending on your dataset.
			activation_fn: function
			A nonlinear activation function to apply. If you're not sure,
			`tf.nn.relu` is probably a good default for your application.
			"""
	super(GraphConv, self).__init__(**kwargs)		super(GraphConv, self).__init__(**kwargs)
	self.out_channel = out_channel		self.out_channel = out_channel
	self.min_degree = min_deg		self.min_degree = min_deg
	@@ -143,8 +178,27 @@ class GraphConv(tf.keras.layers.Layer):


	class GraphPool(tf.keras.layers.Layer):		class GraphPool(tf.keras.layers.Layer):
			"""A GraphPool gathers data from local neighborhoods of a graph.

			This layer does a max-pooling over the feature vectors of atoms in a
			neighborhood. You can think of this layer as analogous to a max-pooling layer
			for 2D convolutions but which operates on graphs instead.
			"""

	def __init__(self, min_degree=0, max_degree=10, **kwargs):		def __init__(self, min_degree=0, max_degree=10, **kwargs):
			"""Initialize this layer

			Parameters
			----------
			min_deg: int, optional (default 0)
			The minimum allowed degree for each graph node.
			max_deg: int, optional (default 10)
			The maximum allowed degree for each graph node. Note that this
			is set to 10 to handle complex molecules (some organometallic
			compounds have strange structures). If you're using this for
			non-molecular applications, you may need to set this much higher
			depending on your dataset.
			"""
	super(GraphPool, self).__init__(**kwargs)		super(GraphPool, self).__init__(**kwargs)
	self.min_degree = min_degree		self.min_degree = min_degree
	self.max_degree = max_degree		self.max_degree = max_degree
	@@ -195,8 +249,36 @@ class GraphPool(tf.keras.layers.Layer):


	class GraphGather(tf.keras.layers.Layer):		class GraphGather(tf.keras.layers.Layer):
			"""A GraphGather layer pools node-level feature vectors to create a graph feature vector.

			Many graph convolutional networks manipulate feature vectors per
			graph-node. For a molecule for example, each node might represent an
			atom, and the network would manipulate atomic feature vectors that
			summarize the local chemistry of the atom. However, at the end of
			the application, we will likely want to work with a molecule level
			feature representation. The `GraphGather` layer creates a graph level
			feature vector by combining all the node-level feature vectors.

			One subtlety about this layer is that it depends on the
			`batch_size`. This is done for internal implementation reasons. The
			`GraphConv`, and `GraphPool` layers pool all nodes from all graphs
			in a batch that's being processed. The `GraphGather` reassembles
			these jumbled node feature vectors into per-graph feature vectors.
			"""

	def __init__(self, batch_size, activation_fn=None, **kwargs):		def __init__(self, batch_size, activation_fn=None, **kwargs):
			"""Initialize this layer.

			Parameters
			---------
			batch_size: int
			The batch size for this layer. Note that the layer's behavior
			changes depending on the batch size.
			activation_fn: function
			A nonlinear activation function to apply. If you're not sure,
			`tf.nn.relu` is probably a good default for your application.
			"""

	super(GraphGather, self).__init__(**kwargs)		super(GraphGather, self).__init__(**kwargs)
	self.batch_size = batch_size		self.batch_size = batch_size
	self.activation_fn = activation_fn		self.activation_fn = activation_fn
	@@ -208,7 +290,15 @@ class GraphGather(tf.keras.layers.Layer):
	return config		return config

	def call(self, inputs):		def call(self, inputs):
	# x = [atom_features, deg_slice, membership, deg_adj_list placeholders...]		"""Invoking this layer.

			Parameters
			----------
			inputs: list
			This list should consist of `inputs = [atom_features, deg_slice,
			membership, deg_adj_list placeholders...]`. These are all
			tensors that are created/process by `GraphConv` and `GraphPool`
			"""
	atom_features = inputs[0]		atom_features = inputs[0]

	# Extract graph topology		# Extract graph topology
	@@ -507,16 +597,15 @@ class IterRefLSTMEmbedding(tf.keras.layers.Layer):
	Parameters		Parameters
	----------		----------
	inputs: list		inputs: list
	List of two tensors (X, Xp). X should be of shape (n_test, n_feat) and		List of two tensors (X, Xp). X should be of shape (n_test,
	Xp should be of shape (n_support, n_feat) where n_test is the size of		n_feat) and Xp should be of shape (n_support, n_feat) where
	the test set, n_support that of the support set, and n_feat is the number		n_test is the size of the test set, n_support that of the
	of per-atom features.		support set, and n_feat is the number of per-atom features.

	Returns		Returns
	-------		-------
	list		Returns two tensors of same shape as input. Namely the output
	Returns two tensors of same shape as input. Namely the output shape will		shape will be [(n_test, n_feat), (n_support, n_feat)]
	be [(n_test, n_feat), (n_support, n_feat)]
	"""		"""
	if len(inputs) != 2:		if len(inputs) != 2:
	raise ValueError(		raise ValueError(
	@@ -560,10 +649,11 @@ class IterRefLSTMEmbedding(tf.keras.layers.Layer):
	class SwitchedDropout(tf.keras.layers.Layer):		class SwitchedDropout(tf.keras.layers.Layer):
	"""Apply dropout based on an input.		"""Apply dropout based on an input.

	This is required for uncertainty prediction. The standard Keras Dropout		This is required for uncertainty prediction. The standard Keras
	layer only performs dropout during training, but we sometimes need to do it		Dropout layer only performs dropout during training, but we
	during prediction. The second input to this layer should be a scalar equal to		sometimes need to do it during prediction. The second input to this
	0 or 1, indicating whether to perform dropout.		layer should be a scalar equal to 0 or 1, indicating whether to
			perform dropout.
	"""		"""

	def __init__(self, rate, **kwargs):		def __init__(self, rate, **kwargs):
	@@ -584,6 +674,13 @@ class WeightedLinearCombo(tf.keras.layers.Layer):
	"""Computes a weighted linear combination of input layers, with the weights defined by trainable variables."""		"""Computes a weighted linear combination of input layers, with the weights defined by trainable variables."""

	def __init__(self, std=0.3, **kwargs):		def __init__(self, std=0.3, **kwargs):
			"""Initialize this layer.

			Parameters
			----------
			std: float, optional (default 0.3)
			The standard deviation to use when randomly initializing weights.
			"""
	super(WeightedLinearCombo, self).__init__(**kwargs)		super(WeightedLinearCombo, self).__init__(**kwargs)
	self.std = std		self.std = std

	@@ -617,17 +714,18 @@ class CombineMeanStd(tf.keras.layers.Layer):
	def __init__(self, training_only=False, noise_epsilon=1.0, **kwargs):		def __init__(self, training_only=False, noise_epsilon=1.0, **kwargs):
	"""Create a CombineMeanStd layer.		"""Create a CombineMeanStd layer.

	This layer should have two inputs with the same shape, and its output also has the		This layer should have two inputs with the same shape, and its
	same shape. Each element of the output is a Gaussian distributed random number		output also has the same shape. Each element of the output is a
	whose mean is the corresponding element of the first input, and whose standard		Gaussian distributed random number whose mean is the corresponding
	deviation is the corresponding element of the second input.		element of the first input, and whose standard deviation is the
			corresponding element of the second input.

	Parameters		Parameters
	----------		----------
	training_only: bool		training_only: bool
	if True, noise is only generated during training. During prediction, the output		if True, noise is only generated during training. During
	is simply equal to the first input (that is, the mean of the distribution used		prediction, the output is simply equal to the first input (that
	during training).		is, the mean of the distribution used during training).
	noise_epsilon: float		noise_epsilon: float
	The noise is scaled by this factor		The noise is scaled by this factor
	"""		"""
	@@ -671,10 +769,10 @@ class Stack(tf.keras.layers.Layer):
	class Variable(tf.keras.layers.Layer):		class Variable(tf.keras.layers.Layer):
	"""Output a trainable value.		"""Output a trainable value.

	Due to a quirk of Keras, you must pass an input value when invoking this layer.		Due to a quirk of Keras, you must pass an input value when invoking
	It doesn't matter what value you pass. Keras assumes every layer that is not		this layer. It doesn't matter what value you pass. Keras assumes
	an Input will have at least one parent, and violating this assumption causes		every layer that is not an Input will have at least one parent, and
	errors during evaluation.		violating this assumption causes errors during evaluation.
	"""		"""

	def __init__(self, initial_value, **kwargs):		def __init__(self, initial_value, **kwargs):
	@@ -830,8 +928,11 @@ class VinaFreeEnergy(tf.keras.layers.Layer):
	class NeighborList(tf.keras.layers.Layer):		class NeighborList(tf.keras.layers.Layer):
	"""Computes a neighbor-list in Tensorflow.		"""Computes a neighbor-list in Tensorflow.

	Neighbor-lists (also called Verlet Lists) are a tool for grouping atoms which		Neighbor-lists (also called Verlet Lists) are a tool for grouping
	are close to each other spatially		atoms which are close to each other spatially. This layer computes a
			Neighbor List from a provided tensor of atomic coordinates. You can
			think of this as a general "k-means" layer, but optimized for the
			case `k==3`.

	TODO(rbharath): Make this layer support batching.		TODO(rbharath): Make this layer support batching.
	"""		"""
	@@ -1121,9 +1222,12 @@ class NeighborList(tf.keras.layers.Layer):
	class AtomicConvolution(tf.keras.layers.Layer):		class AtomicConvolution(tf.keras.layers.Layer):
	"""Implements the atomic convolutional transform introduced in		"""Implements the atomic convolutional transform introduced in

	Gomes, Joseph, et al. "Atomic convolutional networks for predicting protein-ligand binding affinity." arXiv preprint arXiv:1703.10603 (2017).		Gomes, Joseph, et al. "Atomic convolutional networks for predicting
			protein-ligand binding affinity." arXiv preprint arXiv:1703.10603
			(2017).

	At a high level, this transform performs a sort of graph convolution on the nearest neighbors graph in 3D space.		At a high level, this transform performs a graph convolution
			on the nearest neighbors graph in 3D space.
	"""		"""

	def __init__(self,		def __init__(self,
	@@ -1433,7 +1537,8 @@ class BetaShare(tf.keras.layers.Layer):
	Parameters		Parameters
	----------		----------
	in_layers: list of Layers or tensors		in_layers: list of Layers or tensors
	tensors in list must be the same size and list must include two or more tensors		tensors in list must be the same size and list must include two or
			more tensors

	Returns		Returns
	-------		-------
	@@ -1656,15 +1761,15 @@ class GraphEmbedPoolLayer(tf.keras.layers.Layer):
	GraphCNNPool Layer from Robust Spatial Filtering with Graph Convolutional Neural Networks		GraphCNNPool Layer from Robust Spatial Filtering with Graph Convolutional Neural Networks
	https://arxiv.org/abs/1703.00792		https://arxiv.org/abs/1703.00792

	This is a learnable pool operation		This is a learnable pool operation It constructs a new adjacency
	It constructs a new adjacency matrix for a graph of specified number of nodes.		matrix for a graph of specified number of nodes.

	This differs from our other pool opertions which set vertices to a function value		This differs from our other pool operations which set vertices to a
	without altering the adjacency matrix.		function value without altering the adjacency matrix.

	$V_{emb} = SpatialGraphCNN({V_{in}})$\\		..math:: V_{emb} = SpatialGraphCNN({V_{in}})
	$V_{out} = \sigma(V_{emb})^{T} * V_{in}$		..math:: V_{out} = \sigma(V_{emb})^{T} * V_{in}
	$A_{out} = V_{emb}^{T} * A_{in} * V_{emb}$		..math:: A_{out} = V_{emb}^{T} * A_{in} * V_{emb}
	"""		"""

	def __init__(self, num_vertices, **kwargs):		def __init__(self, num_vertices, **kwargs):
	@@ -1693,7 +1798,6 @@ class GraphEmbedPoolLayer(tf.keras.layers.Layer):
	----------		----------
	num_filters: int		num_filters: int
	Number of filters to have in the output		Number of filters to have in the output

	in_layers: list of Layers or tensors		in_layers: list of Layers or tensors
	[V, A, mask]		[V, A, mask]
	V are the vertex features must be of shape (batch, vertex, channel)		V are the vertex features must be of shape (batch, vertex, channel)
	@@ -1704,9 +1808,10 @@ class GraphEmbedPoolLayer(tf.keras.layers.Layer):
	mask is optional, to be used when not every graph has the		mask is optional, to be used when not every graph has the
	same number of vertices		same number of vertices

	Returns: tf.tensor		Returns
	Returns a tf.tensor with a graph convolution applied		-------
	The shape will be (batch, vertex, self.num_filters)		Returns a `tf.tensor` with a graph convolution applied
			The shape will be `(batch, vertex, self.num_filters)`.
	"""		"""
	if len(inputs) == 3:		if len(inputs) == 3:
	V, A, mask = inputs		V, A, mask = inputs
	@@ -2761,7 +2866,9 @@ class GatedRecurrentUnit(tf.keras.layers.Layer):

	class SetGather(tf.keras.layers.Layer):		class SetGather(tf.keras.layers.Layer):
	"""set2set gather layer for graph-based model		"""set2set gather layer for graph-based model
	model using this layer must set pad_batches=True """
			Models using this layer must set `pad_batches=True`.
			"""

	def __init__(self, M, batch_size, n_hidden=100, init='orthogonal', **kwargs):		def __init__(self, M, batch_size, n_hidden=100, init='orthogonal', **kwargs):
	"""		"""
	@@ -2799,7 +2906,9 @@ class SetGather(tf.keras.layers.Layer):

	def call(self, inputs):		def call(self, inputs):
	"""Perform M steps of set2set gather,		"""Perform M steps of set2set gather,
	detailed descriptions in: https://arxiv.org/abs/1511.06391 """
			Detailed descriptions in: https://arxiv.org/abs/1511.06391
			"""
	atom_features, atom_split = inputs		atom_features, atom_split = inputs
	c = tf.zeros((self.batch_size, self.n_hidden))		c = tf.zeros((self.batch_size, self.n_hidden))
	h = tf.zeros((self.batch_size, self.n_hidden))		h = tf.zeros((self.batch_size, self.n_hidden))

deepchem/molnet/load_function/clintox_datasets.py

+27 −1

Original line number	Original line	Diff line number	Diff line
	@@ -18,7 +18,33 @@ def load_clintox(featurizer='ECFP',
	data_dir=None,		data_dir=None,
	save_dir=None,		save_dir=None,
	**kwargs):		**kwargs):
	"""Load clintox datasets."""		"""Load clintox datasets.

			The ClinTox dataset compares drugs approved by the FDA and
			drugs that have failed clinical trials for toxicity reasons.
			The dataset includes two classification tasks for 1491 drug
			compounds with known chemical structures: (1) clinical trial
			toxicity (or absence of toxicity) and (2) FDA approval status.
			List of FDA-approved drugs are compiled from the SWEETLEAD
			database, and list of drugs that failed clinical trials for
			toxicity reasons are compiled from the Aggregate Analysis of
			ClinicalTrials.gov(AACT) database.

			The data file contains a csv table, in which columns below are
			used:
			"smiles" - SMILES representation of the molecular structure
			"FDA_APPROVED" - FDA approval status
			"CT_TOX" - Clinical trial results

			References:
			Gayvert, Kaitlyn M., Neel S. Madhukar, and Olivier Elemento. "A data-driven approach to predicting successes and failures of clinical trials." Cell chemical biology 23.10 (2016): 1294-1301.

			Artemov, Artem V., et al. "Integrated deep learned transcriptomic and structure-based predictor of clinical trials outcomes." bioRxiv (2016): 095653.

			Novick, Paul A., et al. "SWEETLEAD: an in silico database of approved drugs, regulated chemicals, and herbal isolates for computer-aided drug discovery." PloS one 8.11 (2013): e79568.

			Aggregate Analysis of ClincalTrials.gov (AACT) Database. https://www.ctti-clinicaltrials.org/aact-database
			"""
	if data_dir is None:		if data_dir is None:
	data_dir = DEFAULT_DIR		data_dir = DEFAULT_DIR
	if save_dir is None:		if save_dir is None:

Admin message