Merge pull request #1042 from miaecle/temp3 (c099b76b) · Commits · 钟慕尧 / deepchem

deepchem/data/datasets.py

+4 −2

Original line number	Diff line number	Diff line
		@@ -789,11 +789,13 @@ class DiskDataset(Dataset):
		else:
		shard_batch_size = batch_size

		num_local_batches = math.ceil(n_shard_samples / shard_batch_size)

		if n_shard_samples == 0:
		cur_shard += 1
		if batch_size is None:
		cur_global_batch += 1
		continue

		num_local_batches = math.ceil(n_shard_samples / shard_batch_size)
		if not deterministic:
		sample_perm = np.random.permutation(n_shard_samples)
		else:

deepchem/models/tensorgraph/layers.py

+31 −0

Original line number	Diff line number	Diff line
		@@ -1562,6 +1562,37 @@ class ReduceMean(Layer):
		return out_tensor


		class ReduceMax(Layer):

		def __init__(self, in_layers=None, axis=None, **kwargs):
		if axis is not None and not isinstance(axis, Sequence):
		axis = [axis]
		self.axis = axis
		super(ReduceMax, self).__init__(in_layers, **kwargs)
		if axis is None:
		self._shape = tuple()
		else:
		try:
		parent_shape = self.in_layers[0].shape
		self._shape = [
		parent_shape[i] for i in range(len(parent_shape)) if i not in axis
		]
		except:
		pass

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		if len(inputs) > 1:
		self.out_tensor = tf.stack(inputs)
		else:
		self.out_tensor = inputs[0]

		out_tensor = tf.reduce_max(self.out_tensor, axis=self.axis)
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor


		class ToFloat(Layer):

		def __init__(self, in_layers=None, **kwargs):

deepchem/models/tensorgraph/models/graph_models.py

+28 −6

Original line number	Diff line number	Diff line
		@@ -139,7 +139,7 @@ class WeaveTensorGraph(TensorGraph):
		pad_batches=pad_batches):

		feed_dict = dict()
		if y_b is not None and not predict:
		if y_b is not None:
		for index, label in enumerate(self.labels_fd):
		if self.mode == "classification":
		feed_dict[label] = to_one_hot(y_b[:, index])
		@@ -182,6 +182,17 @@ class WeaveTensorGraph(TensorGraph):
		feed_dict[self.atom_to_pair] = np.concatenate(atom_to_pair, axis=0)
		yield feed_dict

		def predict_on_generator(self, generator, transformers=[], outputs=None):
		out = super(WeaveTensorGraph, self).predict_on_generator(
		generator, transformers=[], outputs=outputs)
		if outputs is None:
		outputs = self.outputs
		if len(outputs) > 1:
		out = np.stack(out, axis=1)

		out = undo_transforms(out, transformers)
		return out


		class DTNNTensorGraph(TensorGraph):

		@@ -193,7 +204,7 @@ class DTNNTensorGraph(TensorGraph):
		distance_min=-1,
		distance_max=18,
		output_activation=True,
		mode="classification",
		mode="regression",
		**kwargs):
		"""
		Parameters
		@@ -294,7 +305,7 @@ class DTNNTensorGraph(TensorGraph):
		pad_batches=pad_batches):

		feed_dict = dict()
		if y_b is not None and not predict:
		if y_b is not None:
		for index, label in enumerate(self.labels_fd):
		feed_dict[label] = y_b[:, index:index + 1]
		if w_b is not None:
		@@ -456,7 +467,7 @@ class DAGTensorGraph(TensorGraph):
		pad_batches=pad_batches):

		feed_dict = dict()
		if y_b is not None and not predict:
		if y_b is not None:
		for index, label in enumerate(self.labels_fd):
		if self.mode == "classification":
		feed_dict[label] = to_one_hot(y_b[:, index])
		@@ -496,6 +507,17 @@ class DAGTensorGraph(TensorGraph):
		feed_dict[self.n_atoms] = n_atoms
		yield feed_dict

		def predict_on_generator(self, generator, transformers=[], outputs=None):
		out = super(DAGTensorGraph, self).predict_on_generator(
		generator, transformers=[], outputs=outputs)
		if outputs is None:
		outputs = self.outputs
		if len(outputs) > 1:
		out = np.stack(out, axis=1)

		out = undo_transforms(out, transformers)
		return out


		class PetroskiSuchTensorGraph(TensorGraph):
		"""
		@@ -1034,7 +1056,7 @@ class MPNNTensorGraph(TensorGraph):
		pad_batches=pad_batches):

		feed_dict = dict()
		if y_b is not None and not predict:
		if y_b is not None:
		for index, label in enumerate(self.labels_fd):
		if self.mode == "classification":
		feed_dict[label] = to_one_hot(y_b[:, index])
		@@ -1099,7 +1121,7 @@ class MPNNTensorGraph(TensorGraph):
		results = []
		for feed_dict in generator:
		# Extract number of unique samples in the batch from w_b
		n_valid_samples = len(np.nonzero(feed_dict[self.weights][:, 0])[0])
		n_valid_samples = len(np.nonzero(np.sum(feed_dict[self.weights], 1))[0])
		feed_dict = {
		self.layers[k.name].out_tensor: v
		for k, v in six.iteritems(feed_dict)

deepchem/models/tensorgraph/models/text_cnn.py

+18 −10

Original line number	Diff line number	Diff line
		@@ -10,7 +10,7 @@ import copy
		from deepchem.metrics import to_one_hot, from_one_hot
		from deepchem.models.tensorgraph.layers import Dense, Concat, SoftMax, \
		SoftMaxCrossEntropy, BatchNorm, WeightedError, Dropout, BatchNormalization, \
		Conv1D, MaxPool1D, Squeeze, Stack, Highway
		Conv1D, ReduceMax, Squeeze, Stack, Highway
		from deepchem.models.tensorgraph.graph_layers import DTNNEmbedding

		from deepchem.models.tensorgraph.layers import L2Loss, Label, Weights, Feature
		@@ -104,6 +104,8 @@ class TextCNNTensorGraph(TensorGraph):
		Properties of filters used in the conv net
		num_filters: list of int, optional
		Properties of filters used in the conv net
		dropout: float, optional
		Dropout rate
		mode: str
		Either "classification" or "regression" for type of model.
		"""
		@@ -174,15 +176,10 @@ class TextCNNTensorGraph(TensorGraph):
		in_layers=[self.Embedding]))
		# Max-over-time pooling
		self.pooled_outputs.append(
		MaxPool1D(
		window_shape=self.seq_length - filter_size + 1,
		strides=1,
		padding='VALID',
		in_layers=[self.conv_layers[-1]]))
		ReduceMax(axis=1, in_layers=[self.conv_layers[-1]]))
		# Concat features from all filters(one feature per filter)
		concat_outputs = Concat(axis=2, in_layers=self.pooled_outputs)
		outputs = Squeeze(squeeze_dims=1, in_layers=concat_outputs)
		dropout = Dropout(dropout_prob=self.dropout, in_layers=[outputs])
		concat_outputs = Concat(axis=1, in_layers=self.pooled_outputs)
		dropout = Dropout(dropout_prob=self.dropout, in_layers=[concat_outputs])
		dense = Dense(
		out_channels=200, activation_fn=tf.nn.relu, in_layers=[dropout])
		# Highway layer from https://arxiv.org/pdf/1505.00387.pdf
		@@ -211,7 +208,7 @@ class TextCNNTensorGraph(TensorGraph):
		cost = L2Loss(in_layers=[label, regression])
		costs.append(cost)
		if self.mode == "classification":
		all_cost = Concat(in_layers=costs, axis=1)
		all_cost = Stack(in_layers=costs, axis=1)
		elif self.mode == "regression":
		all_cost = Stack(in_layers=costs, axis=1)
		self.weights = Weights(shape=(None, self.n_tasks))
		@@ -272,3 +269,14 @@ class TextCNNTensorGraph(TensorGraph):
		# Padding with '_'
		seq.append(self.char_dict['_'])
		return np.array(seq)

		def predict_on_generator(self, generator, transformers=[], outputs=None):
		out = super(TextCNNTensorGraph, self).predict_on_generator(
		generator, transformers=[], outputs=outputs)
		if outputs is None:
		outputs = self.outputs
		if len(outputs) > 1:
		out = np.stack(out, axis=1)

		out = undo_transforms(out, transformers)
		return out

deepchem/molnet/check_availability.py

+28 −1

Original line number	Diff line number	Diff line
		@@ -9,6 +9,8 @@ CheckFeaturizer = {
		('bace_c', 'graphconv'): ['GraphConv', 75],
		('bace_c', 'dag'): ['GraphConv', 75],
		('bace_c', 'weave'): ['Weave', 75],
		('bace_c', 'textcnn'): ['Raw', None],
		('bace_c', 'mpnn'): ['Weave', [75, 14]],
		('bbbp', 'logreg'): ['ECFP', 1024],
		('bbbp', 'tf'): ['ECFP', 1024],
		('bbbp', 'tf_robust'): ['ECFP', 1024],
		@@ -19,6 +21,8 @@ CheckFeaturizer = {
		('bbbp', 'graphconv'): ['GraphConv', 75],
		('bbbp', 'dag'): ['GraphConv', 75],
		('bbbp', 'weave'): ['Weave', 75],
		('bbbp', 'textcnn'): ['Raw', None],
		('bbbp', 'mpnn'): ['Weave', [75, 14]],
		('clintox', 'logreg'): ['ECFP', 1024],
		('clintox', 'tf'): ['ECFP', 1024],
		('clintox', 'tf_robust'): ['ECFP', 1024],
		@@ -29,6 +33,8 @@ CheckFeaturizer = {
		('clintox', 'graphconv'): ['GraphConv', 75],
		('clintox', 'dag'): ['GraphConv', 75],
		('clintox', 'weave'): ['Weave', 75],
		('clintox', 'textcnn'): ['Raw', None],
		('clintox', 'mpnn'): ['Weave', [75, 14]],
		('hiv', 'logreg'): ['ECFP', 1024],
		('hiv', 'tf'): ['ECFP', 1024],
		('hiv', 'tf_robust'): ['ECFP', 1024],
		@@ -39,6 +45,8 @@ CheckFeaturizer = {
		('hiv', 'graphconv'): ['GraphConv', 75],
		('hiv', 'dag'): ['GraphConv', 75],
		('hiv', 'weave'): ['Weave', 75],
		('hiv', 'textcnn'): ['Raw', None],
		('hiv', 'mpnn'): ['Weave', [75, 14]],
		('muv', 'logreg'): ['ECFP', 1024],
		('muv', 'tf'): ['ECFP', 1024],
		('muv', 'tf_robust'): ['ECFP', 1024],
		@@ -51,6 +59,8 @@ CheckFeaturizer = {
		('muv', 'attn'): ['GraphConv', 75],
		('muv', 'res'): ['GraphConv', 75],
		('muv', 'weave'): ['Weave', 75],
		('muv', 'textcnn'): ['Raw', None],
		('muv', 'mpnn'): ['Weave', [75, 14]],
		('pcba', 'logreg'): ['ECFP', 1024],
		('pcba', 'tf'): ['ECFP', 1024],
		('pcba', 'tf_robust'): ['ECFP', 1024],
		@@ -58,6 +68,7 @@ CheckFeaturizer = {
		('pcba', 'xgb'): ['ECFP', 1024],
		('pcba', 'graphconv'): ['GraphConv', 75],
		('pcba', 'weave'): ['Weave', 75],
		('pcba', 'textcnn'): ['Raw', None],
		('pcba_146', 'logreg'): ['ECFP', 1024],
		('pcba_146', 'tf'): ['ECFP', 1024],
		('pcba_146', 'tf_robust'): ['ECFP', 1024],
		@@ -85,6 +96,8 @@ CheckFeaturizer = {
		('sider', 'siamese'): ['GraphConv', 75],
		('sider', 'attn'): ['GraphConv', 75],
		('sider', 'res'): ['GraphConv', 75],
		('sider', 'textcnn'): ['Raw', None],
		('sider', 'mpnn'): ['Weave', [75, 14]],
		('tox21', 'logreg'): ['ECFP', 1024],
		('tox21', 'tf'): ['ECFP', 1024],
		('tox21', 'tf_robust'): ['ECFP', 1024],
		@@ -98,6 +111,8 @@ CheckFeaturizer = {
		('tox21', 'siamese'): ['GraphConv', 75],
		('tox21', 'attn'): ['GraphConv', 75],
		('tox21', 'res'): ['GraphConv', 75],
		('tox21', 'textcnn'): ['Raw', None],
		('tox21', 'mpnn'): ['Weave', [75, 14]],
		('toxcast', 'logreg'): ['ECFP', 1024],
		('toxcast', 'tf'): ['ECFP', 1024],
		('toxcast', 'tf_robust'): ['ECFP', 1024],
		@@ -107,6 +122,8 @@ CheckFeaturizer = {
		('toxcast', 'xgb'): ['ECFP', 1024],
		('toxcast', 'graphconv'): ['GraphConv', 75],
		('toxcast', 'weave'): ['Weave', 75],
		('toxcast', 'textcnn'): ['Raw', None],
		('toxcast', 'mpnn'): ['Weave', [75, 14]],
		('bace_r', 'tf_regression'): ['ECFP', 1024],
		('bace_r', 'rf_regression'): ['ECFP', 1024],
		('bace_r', 'krr'): ['ECFP', 1024],
		@@ -114,6 +131,7 @@ CheckFeaturizer = {
		('bace_r', 'graphconvreg'): ['GraphConv', 75],
		('bace_r', 'dag_regression'): ['GraphConv', 75],
		('bace_r', 'weave_regression'): ['Weave', 75],
		('bace_r', 'textcnn_regression'): ['Raw', None],
		('chembl', 'tf_regression'): ['ECFP', 1024],
		('chembl', 'rf_regression'): ['ECFP', 1024],
		('chembl', 'krr'): ['ECFP', 1024],
		@@ -135,6 +153,7 @@ CheckFeaturizer = {
		('delaney', 'dag_regression'): ['GraphConv', 75],
		('delaney', 'weave_regression'): ['Weave', 75],
		('delaney', 'mpnn'): ['Weave', [75, 14]],
		('delaney', 'textcnn_regression'): ['Raw', None],
		('hopv', 'tf_regression'): ['ECFP', 1024],
		('hopv', 'rf_regression'): ['ECFP', 1024],
		('hopv', 'krr'): ['ECFP', 1024],
		@@ -150,6 +169,7 @@ CheckFeaturizer = {
		('lipo', 'dag_regression'): ['GraphConv', 75],
		('lipo', 'weave_regression'): ['Weave', 75],
		('lipo', 'mpnn'): ['Weave', [75, 14]],
		('lipo', 'textcnn_regression'): ['Raw', None],
		('nci', 'tf_regression'): ['ECFP', 1024],
		('nci', 'rf_regression'): ['ECFP', 1024],
		('nci', 'krr'): ['ECFP', 1024],
		@@ -171,6 +191,7 @@ CheckFeaturizer = {
		('sampl', 'dag_regression'): ['GraphConv', 75],
		('sampl', 'weave_regression'): ['Weave', 75],
		('sampl', 'mpnn'): ['Weave', [75, 14]],
		('sampl', 'textcnn_regression'): ['Raw', None],
		('kaggle', 'tf_regression'): [None, 14293],
		('kaggle', 'rf_regression'): [None, 14293],
		('kaggle', 'krr'): [None, 14293],
		@@ -182,7 +203,9 @@ CheckFeaturizer = {
		('qm7', 'rf_regression'): ['ECFP', 1024],
		('qm7', 'krr'): ['ECFP', 1024],
		('qm7', 'krr_ft'): ['CoulombMatrix', 1024],
		('qm7', 'textcnn_regression'): ['Raw', None],
		('qm7', 'graphconvreg'): ['GraphConv', 75],
		('qm7', 'weave_regression'): ['Weave', 75],
		('qm7', 'tf_regression_ft'): ['CoulombMatrix', [23, 23]],
		('qm7', 'dtnn'): ['CoulombMatrix', [23, 23]],
		('qm7', 'ani'): ['BPSymmetryFunction', [23, 4]],
		@@ -198,6 +221,8 @@ CheckFeaturizer = {
		('qm8', 'dtnn'): ['CoulombMatrix', [26, 26]],
		('qm8', 'ani'): ['BPSymmetryFunction', [26, 4]],
		('qm8', 'mpnn'): ['MP', [70, 8]],
		('qm8', 'weave_regression'): ['Weave', 75],
		('qm8', 'textcnn_regression'): ['Raw', None],
		('qm9', 'tf_regression'): ['ECFP', 1024],
		('qm9', 'rf_regression'): ['ECFP', 1024],
		('qm9', 'krr'): ['ECFP', 1024],
		@@ -206,7 +231,9 @@ CheckFeaturizer = {
		('qm9', 'krr_ft'): ['CoulombMatrix', 1024],
		('qm9', 'dtnn'): ['CoulombMatrix', [29, 29]],
		('qm9', 'ani'): ['BPSymmetryFunction', [29, 4]],
		('qm9', 'mpnn'): ['MP', [70, 8]]
		('qm9', 'mpnn'): ['MP', [70, 8]],
		('qm9', 'weave_regression'): ['Weave', 75],
		('qm9', 'textcnn_regression'): ['Raw', None]
		}

		CheckSplit = {

Admin message