Merge pull request #949 from peastman/examples (766139be) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorflow_models/fcnet.py

+7 −8

Original line number	Diff line number	Diff line
		@@ -22,7 +22,7 @@ from deepchem.models.tensorflow_models import TensorflowRegressor
		from deepchem.metrics import to_one_hot

		from deepchem.models.tensorgraph.tensor_graph import TensorGraph, TFWrapper
		from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMaxCrossEntropy, L2Loss
		from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMaxCrossEntropy, L2Loss, ReduceSum


		class TensorGraphMultiTaskClassifier(TensorGraph):
		@@ -280,8 +280,7 @@ class TensorGraphMultiTaskRegressor(TensorGraph):
		self.add_output(output)
		labels = Label(shape=(None, n_tasks, 1))
		weights = Weights(shape=(None, n_tasks))
		loss = L2Loss(in_layers=[labels, output])
		weighted_loss = WeightedError(in_layers=[loss, weights])
		weighted_loss = ReduceSum(L2Loss(in_layers=[labels, output, weights]))
		if weight_decay_penalty != 0.0:
		weighted_loss = WeightDecay(
		weight_decay_penalty,
		@@ -500,8 +499,8 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
		orig_dict["labels_%d" % task] = to_one_hot(y_b[:, task])
		else:
		# Dummy placeholders
		orig_dict["labels_%d" %
		task] = np.squeeze(to_one_hot(np.zeros((self.batch_size,))))
		orig_dict["labels_%d" % task] = np.squeeze(
		to_one_hot(np.zeros((self.batch_size,))))
		if w_b is not None:
		orig_dict["weights_%d" % task] = w_b[:, task]
		else:
		@@ -588,8 +587,8 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
		weight_init=tf.truncated_normal(
		shape=[prev_layer_size, 1],
		stddev=weight_init_stddevs[i]),
		bias_init=tf.constant(value=bias_init_consts[i], shape=[1
		]))))
		bias_init=tf.constant(value=bias_init_consts[i],
		shape=[1]))))
		return (output, labels, weights)

		def construct_feed_dict(self, X_b, y_b=None, w_b=None, ids_b=None):

deepchem/models/tensorgraph/layers.py

+43 −29

Original line number	Diff line number	Diff line
		@@ -425,8 +425,8 @@ class Conv1D(Layer):
		raise ValueError("Parent tensor must be (batch, width, channel)")
		parent_shape = parent.get_shape()
		parent_channel_size = parent_shape[2].value
		f = tf.Variable(self.weights_initializer()
		([self.width, parent_channel_size, self.out_channels]))
		f = tf.Variable(self.weights_initializer()(
		[self.width, parent_channel_size, self.out_channels]))
		t = tf.nn.conv1d(parent, f, stride=self.stride, padding=self.padding)
		if self.biases_initializer is not None:
		b = tf.Variable(self.biases_initializer()([self.out_channels]))
		@@ -947,6 +947,12 @@ class Weights(Input):


		class L1Loss(Layer):
		"""Compute the mean absolute difference between the elements of the inputs.

		This layer should have two or three inputs. If there is a third input, the
		difference between the first two inputs is multiplied by the third one to
		produce a weighted error.
		"""

		def __init__(self, in_layers=None, **kwargs):
		super(L1Loss, self).__init__(in_layers, **kwargs)
		@@ -954,14 +960,22 @@ class L1Loss(Layer):
		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers, True)
		guess, label = inputs[0], inputs[1]
		out_tensor = tf.reduce_mean(
		tf.abs(guess - label), axis=list(range(1, len(label.shape))))
		l1 = tf.abs(guess - label)
		if len(inputs) > 2:
		l1 *= inputs[2]
		out_tensor = tf.reduce_mean(l1, axis=list(range(1, len(label.shape))))
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor


		class L2Loss(Layer):
		"""Compute the mean squared difference between the elements of the inputs.

		This layer should have two or three inputs. If there is a third input, the
		squared difference between the first two inputs is multiplied by the third one to
		produce a weighted error.
		"""

		def __init__(self, in_layers=None, **kwargs):
		super(L2Loss, self).__init__(in_layers, **kwargs)
		@@ -969,17 +983,19 @@ class L2Loss(Layer):
		shape1 = self.in_layers[0].shape
		shape2 = self.in_layers[1].shape
		if shape1[0] is None:
		self._shape = (parent_shape[1],)
		self._shape = (shape2[0],)
		else:
		self._shape = (parent_shape[0],)
		self._shape = (shape1[0],)
		except:
		pass

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers, True)
		guess, label = inputs[0], inputs[1]
		out_tensor = tf.reduce_mean(
		tf.square(guess - label), axis=list(range(1, len(label._shape))))
		l2 = tf.square(guess - label)
		if len(inputs) > 2:
		l2 *= inputs[2]
		out_tensor = tf.reduce_mean(l2, axis=list(range(1, len(label._shape))))
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor
		@@ -1315,7 +1331,7 @@ class SparseSoftMaxCrossEntropy(Layer):
		def __init__(self, in_layers=None, **kwargs):
		super(SparseSoftMaxCrossEntropy, self).__init__(in_layers, **kwargs)
		try:
		self._shape = (self.in_layers[1].shape[0], 1)
		self._shape = self.in_layers[1].shape[:-1]
		except:
		pass

		@@ -1324,9 +1340,8 @@ class SparseSoftMaxCrossEntropy(Layer):
		if len(inputs) != 2:
		raise ValueError()
		labels, logits = inputs[0], inputs[1]
		self.out_tensor = tf.nn.sparse_softmax_cross_entropy_with_logits(
		out_tensor = tf.nn.sparse_softmax_cross_entropy_with_logits(
		logits=logits, labels=labels)
		out_tensor = tf.reshape(self.out_tensor, [-1, 1])
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor
		@@ -1337,7 +1352,7 @@ class SoftMaxCrossEntropy(Layer):
		def __init__(self, in_layers=None, **kwargs):
		super(SoftMaxCrossEntropy, self).__init__(in_layers, **kwargs)
		try:
		self._shape = (self.in_layers[1].shape[0], 1)
		self._shape = self.in_layers[1].shape[:-1]
		except:
		pass

		@@ -1346,9 +1361,8 @@ class SoftMaxCrossEntropy(Layer):
		if len(inputs) != 2:
		raise ValueError()
		labels, logits = inputs[0], inputs[1]
		self.out_tensor = tf.nn.softmax_cross_entropy_with_logits(
		out_tensor = tf.nn.softmax_cross_entropy_with_logits(
		logits=logits, labels=labels)
		out_tensor = tf.reshape(self.out_tensor, [-1, 1])
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor
		@@ -1887,8 +1901,8 @@ class MaxPool1D(Layer):
		super(MaxPool1D, self).__init__(**kwargs)
		try:
		parent_shape = self.in_layers[0].shape
		self._shape = tuple(None if p is None else p // s
		for p, s in zip(parent_shape, strides))
		self._shape = tuple(
		None if p is None else p // s for p, s in zip(parent_shape, strides))
		except:
		pass

		@@ -1919,8 +1933,8 @@ class MaxPool2D(Layer):
		super(MaxPool2D, self).__init__(**kwargs)
		try:
		parent_shape = self.in_layers[0].shape
		self._shape = tuple(None if p is None else p // s
		for p, s in zip(parent_shape, strides))
		self._shape = tuple(
		None if p is None else p // s for p, s in zip(parent_shape, strides))
		except:
		pass

		@@ -1966,8 +1980,8 @@ class MaxPool3D(Layer):
		super(MaxPool3D, self).__init__(**kwargs)
		try:
		parent_shape = self.in_layers[0].shape
		self._shape = tuple(None if p is None else p // s
		for p, s in zip(parent_shape, strides))
		self._shape = tuple(
		None if p is None else p // s for p, s in zip(parent_shape, strides))
		except:
		pass

		@@ -2679,7 +2693,7 @@ class WeightedError(Layer):
		self._shape = tuple()

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers, True)
		inputs = self._get_input_tensors(in_layers)
		entropy, weights = inputs[0], inputs[1]
		out_tensor = tf.reduce_sum(entropy * weights)
		if set_tensors:
		@@ -3118,8 +3132,8 @@ class NeighborList(Layer):
		mesh_args = [tf.range(start, stop, nbr_cutoff) for _ in range(self.ndim)]
		return tf.to_float(
		tf.reshape(
		tf.transpose(tf.stack(tf.meshgrid(*mesh_args))), (self.n_cells,
		self.ndim)))
		tf.transpose(tf.stack(tf.meshgrid(*mesh_args))),
		(self.n_cells, self.ndim)))


		class Dropout(Layer):
		@@ -3406,8 +3420,8 @@ class AtomicConvolution(Layer):
		example_tensors = tf.unstack(X, axis=0)
		example_nbrs = tf.unstack(nbr_indices, axis=0)
		all_nbr_coords = []
		for example, (example_tensor,
		example_nbr) in enumerate(zip(example_tensors, example_nbrs)):
		for example, (example_tensor, example_nbr) in enumerate(
		zip(example_tensors, example_nbrs)):
		nbr_coords = tf.gather(example_tensor, example_nbr)
		all_nbr_coords.append(nbr_coords)
		neighbors = tf.stack(all_nbr_coords)
		@@ -3973,13 +3987,13 @@ class GraphCNN(Layer):
		no_features = V.get_shape()[2].value
		W = tf.get_variable(
		'%s_weights' % self.name, [no_features * no_A, self.num_filters],
		initializer=tf.truncated_normal_initializer(stddev=math.sqrt(
		1.0 / (no_features * (no_A + 1) * 1.0))),
		initializer=tf.truncated_normal_initializer(
		stddev=math.sqrt(1.0 / (no_features * (no_A + 1) * 1.0))),
		dtype=tf.float32)
		W_I = tf.get_variable(
		'%s_weights_I' % self.name, [no_features, self.num_filters],
		initializer=tf.truncated_normal_initializer(stddev=math.sqrt(
		1.0 / (no_features * (no_A + 1) * 1.0))),
		initializer=tf.truncated_normal_initializer(
		stddev=math.sqrt(1.0 / (no_features * (no_A + 1) * 1.0))),
		dtype=tf.float32)

		b = tf.get_variable(

deepchem/models/tensorgraph/tests/test_layers.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -318,7 +318,7 @@ class TestLayers(test_util.TensorFlowTestCase):
		label_tensor = tf.convert_to_tensor(label_tensor, dtype=tf.float32)
		out_tensor = SoftMaxCrossEntropy()(logit_tensor, label_tensor)
		out_tensor = out_tensor.eval()
		assert out_tensor.shape == (batch_size, 1)
		assert out_tensor.shape == (batch_size,)

		def test_reduce_mean(self):
		"""Test that ReduceMean can be invoked."""

deepchem/molnet/load_function/kaggle_datasets.py

+8 −6

Original line number	Diff line number	Diff line
		@@ -60,14 +60,14 @@ def gen_kaggle(KAGGLE_tasks,
		"KAGGLE_test2_disguised_combined_full.csv.gz")
		if not os.path.exists(train_files):
		deepchem.utils.download_url(
		'http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/KAGGLE_training_disguised_combined_full.csv.gz'
		)
		'http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/KAGGLE_training_disguised_combined_full.csv.gz',
		dest_dir=data_dir)
		deepchem.utils.download_url(
		'http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/KAGGLE_test1_disguised_combined_full.csv.gz'
		)
		'http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/KAGGLE_test1_disguised_combined_full.csv.gz',
		dest_dir=data_dir)
		deepchem.utils.download_url(
		'http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/KAGGLE_test2_disguised_combined_full.csv.gz'
		)
		'http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/KAGGLE_test2_disguised_combined_full.csv.gz',
		dest_dir=data_dir)

		# Featurize KAGGLE dataset
		print("About to featurize KAGGLE dataset.")
		@@ -125,6 +125,8 @@ def load_kaggle(shard_size=2000, featurizer=None, split=None, reload=True):
		data_dir = deepchem.utils.get_data_dir()

		data_dir = os.path.join(data_dir, "kaggle")
		if not os.path.exists(data_dir):
		os.mkdir(data_dir)
		train_dir = os.path.join(data_dir, "train_dir")
		valid_dir = os.path.join(data_dir, "valid_dir")
		test_dir = os.path.join(data_dir, "test_dir")

examples/chembl/chembl_datasets.py

deleted100644 → 0

+0 −89

Original line number	Diff line number	Diff line
		"""
		ChEMBL dataset loader.
		"""
		from __future__ import division
		from __future__ import print_function
		from __future__ import unicode_literals

		import os
		import sys
		import time

		import deepchem as dc

		sys.path.append(os.path.dirname(os.path.abspath(__file__)))
		from chembl_tasks import chembl_tasks


		# Set shard size low to avoid memory problems.
		def load_chembl(shard_size=2000, featurizer="ECFP", set="5thresh", split="random"):
		############################################################## TIMING
		time1 = time.time()
		############################################################## TIMING
		# Set some global variables up top
		current_dir = os.path.dirname(os.path.realpath(__file__))

		# Load dataset
		print("About to load ChEMBL dataset.")
		if split == "year":
		train_datasets, valid_datasets, test_datasets = [], [], []
		train_files = os.path.join(current_dir,
		"year_sets/chembl_%s_ts_train.csv.gz" % set)
		valid_files = os.path.join(current_dir,
		"year_sets/chembl_%s_ts_valid.csv.gz" % set)
		test_files = os.path.join(current_dir,
		"year_sets/chembl_%s_ts_test.csv.gz" % set)
		else:
		dataset_path = os.path.join(
		current_dir, "../../datasets/chembl_%s.csv.gz" % set)

		# Featurize ChEMBL dataset
		print("About to featurize ChEMBL dataset.")
		if featurizer == 'ECFP':
		featurizer = dc.feat.CircularFingerprint(size=1024)
		elif featurizer == 'GraphConv':
		featurizer = dc.feat.ConvMolFeaturizer()

		loader = dc.data.CSVLoader(
		tasks=chembl_tasks, smiles_field="smiles", featurizer=featurizer)

		if split == "year":
		print("Featurizing train datasets")
		train_dataset = loader.featurize(
		train_files, shard_size=shard_size)

		print("Featurizing valid datasets")
		valid_dataset = loader.featurize(
		valid_files, shard_size=shard_size)

		print("Featurizing test datasets")
		test_dataset = loader.featurize(
		test_files, shard_size=shard_size)
		else:
		dataset = loader.featurize(dataset_path, shard_size=shard_size)

		# Initialize transformers
		print("About to transform data")
		if split == "year":
		transformers = [
		dc.trans.NormalizationTransformer(transform_y=True, dataset=train_dataset)]
		for transformer in transformers:
		train = transformer.transform(train_dataset)
		valid = transformer.transform(valid_dataset)
		test = transformer.transform(test_dataset)
		else:
		transformers = [
		dc.trans.NormalizationTransformer(transform_y=True, dataset=dataset)]
		for transformer in transformers:
		dataset = transformer.transform(dataset)

		splitters = {'index': dc.splits.IndexSplitter(),
		'random': dc.splits.RandomSplitter(),
		'scaffold': dc.splits.ScaffoldSplitter()}
		if split in splitters:
		splitter = splitters[split]
		print("Performing new split.")
		train, valid, test = splitter.train_valid_test_split(dataset)


		return chembl_tasks, (train, valid, test), transformers

Admin message