Merge branch 'master' of https://github.com/deepchem/deepchem (48bbcfe4) · Commits · 钟慕尧 / deepchem

deepchem/models/tensorgraph/layers.py

+33 −0

Original line number	Diff line number	Diff line
		@@ -1542,6 +1542,12 @@ class InteratomicL2Distances(Layer):


		class SparseSoftMaxCrossEntropy(Layer):
		"""Computes Sparse softmax cross entropy between logits and labels.
		labels: Tensor of shape [d_0,d_1,....,d_{r-1}](where r is rank of logits) and must be of dtype int32 or int64.
		logits: Unscaled log probabilities of shape [d_0,....d{r-1},num_classes] and of dtype float32 or float64.
		Note: the rank of the logits should be 1 greater than that of labels.
		The output will be a tensor of same shape as labels and of same type as logits with the loss.
		"""

		def __init__(self, in_layers=None, **kwargs):
		super(SparseSoftMaxCrossEntropy, self).__init__(in_layers, **kwargs)
		@@ -4309,3 +4315,30 @@ class GraphCNN(Layer):
		result = tf.matmul(A_reshape, B)
		result = tf.reshape(result, tf.stack([A_shape[0], A_shape[1], axis_2]))
		return result


		class Hingeloss(Layer):
		"""This layer computes the hinge loss on inputs:[labels,logits]
		labels: The values of this tensor is expected to be 1.0 or 0.0. The shape should be the same as logits.
		logits: Holds the log probabilities for labels, a float tensor.
		The output is a weighted loss tensor of same shape as labels.
		"""

		def __init__(self, in_layers=None, **kwargs):
		super(Hingeloss, self).__init__(in_layers, **kwargs)
		try:
		self._shape = self.in_layers[1].shape
		except:
		pass

		def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
		inputs = self._get_input_tensors(in_layers)
		if len(inputs) != 2:
		raise ValueError()
		labels, logits = inputs[0], inputs[1]
		reduction = tf.losses.Reduction
		out_tensor = tf.losses.hinge_loss(
		labels=labels, logits=logits, reduction=reduction.NONE)
		if set_tensors:
		self.out_tensor = out_tensor
		return out_tensor

deepchem/models/tensorgraph/tests/test_layers.py

+27 −0

Original line number	Diff line number	Diff line
		@@ -25,6 +25,7 @@ from deepchem.models.tensorgraph.layers import GRU
		from deepchem.models.tensorgraph.layers import Gather
		from deepchem.models.tensorgraph.layers import GraphConv
		from deepchem.models.tensorgraph.layers import GraphGather
		from deepchem.models.tensorgraph.layers import Hingeloss
		from deepchem.models.tensorgraph.layers import Input
		from deepchem.models.tensorgraph.layers import InputFifoQueue
		from deepchem.models.tensorgraph.layers import InteratomicL2Distances
		@@ -45,6 +46,7 @@ from deepchem.models.tensorgraph.layers import Sigmoid
		from deepchem.models.tensorgraph.layers import SigmoidCrossEntropy
		from deepchem.models.tensorgraph.layers import SoftMax
		from deepchem.models.tensorgraph.layers import SoftMaxCrossEntropy
		from deepchem.models.tensorgraph.layers import SparseSoftMaxCrossEntropy
		from deepchem.models.tensorgraph.layers import StopGradient
		from deepchem.models.tensorgraph.layers import TensorWrapper
		from deepchem.models.tensorgraph.layers import TimeSeriesDense
		@@ -381,6 +383,18 @@ class TestLayers(test_util.TensorFlowTestCase):
		out_tensor = out_tensor.eval()
		assert out_tensor.shape == (batch_size,)

		def test_sparse_softmax_cross_entropy(self):
		batch_size = 10
		n_features = 5
		logit_tensor = np.random.rand(batch_size, n_features)
		label_tensor = np.random.rand(batch_size)
		with self.test_session() as sess:
		logit_tensor = tf.convert_to_tensor(logit_tensor, dtype=tf.float32)
		label_tensor = tf.convert_to_tensor(label_tensor, dtype=tf.int32)
		out_tensor = SparseSoftMaxCrossEntropy()(label_tensor, logit_tensor)
		out_tensor = out_tensor.eval()
		assert out_tensor.shape == (batch_size,)

		def test_reduce_mean(self):
		"""Test that ReduceMean can be invoked."""
		batch_size = 10
		@@ -875,3 +889,16 @@ class TestLayers(test_util.TensorFlowTestCase):
		assert out_tensor.shape == (batch_size, n_tasks)
		irv_reg = IRVRegularize(irv_layer, 1.)()
		assert irv_reg.eval() >= 0

		def test_hingeloss(self):

		labels = 1
		logits = 1
		logits_tensor = np.random.rand(logits)
		labels_tensor = np.random.rand(labels)
		with self.test_session() as sess:
		logits_tensor = tf.convert_to_tensor(logits_tensor, dtype=tf.float32)
		labels_tensor = tf.convert_to_tensor(labels_tensor, dtype=tf.float32)
		out_tensor = Hingeloss()(labels_tensor, logits_tensor)
		out_tensor = out_tensor.eval()
		assert out_tensor.shape == (labels,)

deepchem/models/tensorgraph/tests/test_layers_pickle.py

+22 −1

Original line number	Diff line number	Diff line
		@@ -10,7 +10,7 @@ from deepchem.models.tensorgraph.layers import Feature, Conv1D, Dense, Flatten,
		SoftMaxCrossEntropy, ReduceMean, ToFloat, ReduceSquareDifference, Conv2D, MaxPool2D, ReduceSum, GraphConv, GraphPool, \
		GraphGather, BatchNorm, WeightedError, ReLU, \
		Conv3D, MaxPool3D, Conv2DTranspose, Conv3DTranspose, \
		LSTMStep, AttnLSTMEmbedding, IterRefLSTMEmbedding, GraphEmbedPoolLayer, GraphCNN, Cast
		LSTMStep, AttnLSTMEmbedding, IterRefLSTMEmbedding, GraphEmbedPoolLayer, GraphCNN, Cast,Hingeloss,SparseSoftMaxCrossEntropy
		from deepchem.models.tensorgraph.symmetry_functions import AtomicDifferentiatedDense
		from deepchem.models.tensorgraph.IRV import IRVLayer, IRVRegularize, Slice

		@@ -269,6 +269,17 @@ def test_SoftmaxCrossEntropy_pickle():
		tg.save()


		def test_SparseSoftmaxCrossEntropy_pickle():
		tg = TensorGraph()
		logits = Feature(shape=(tg.batch_size, 5))
		labels = Feature(shape=(tg.batch_size,), dtype=tf.int32)
		layer = SparseSoftMaxCrossEntropy(in_layers=[labels, logits])
		tg.add_output(layer)
		tg.set_loss(layer)
		tg.build()
		tg.save()


		def test_SigmoidCrossEntropy_pickle():
		tg = TensorGraph()
		feature = Feature(shape=(tg.batch_size, 1))
		@@ -682,3 +693,13 @@ def test_Slice_pickle():
		tg.set_loss(out)
		tg.build()
		tg.save()


		def test_hingeloss_pickle():
		tg = TensorGraph()
		feature = Feature(shape=(1, None))
		layer = Hingeloss(in_layers=[feature, feature])
		tg.add_output(layer)
		tg.set_loss(layer)
		tg.build()
		tg.save()

examples/notebooks/Deepchem_NumpyDataset_tutorial.ipynb

0 → 100644

+325 −0

Original line number	Diff line number	Diff line
		%% Cell type:markdown id: tags:

		# Using Deepchem Datasets
		In this tutorial we will have a look at various deepchem `dataset` methods present in `deepchem.datasets`.

		%% Cell type:code id: tags:

		``` python
		import deepchem as dc
		import numpy as np
		import random
		```

		%% Output

		/home/skand/anaconda2/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
		from ._conv import register_converters as _register_converters

		%% Cell type:markdown id: tags:

		# Using NumpyDatasets
		This is used when you have your data in numpy arrays.

		%% Cell type:code id: tags:

		``` python
		# data is your dataset in numpy array of size : 20x20.
		data = np.random.random((4, 4))
		labels = np.random.random((4,)) # labels of size 20x1
		```

		%% Cell type:code id: tags:

		``` python
		from deepchem.data.datasets import NumpyDataset # import NumpyDataset
		```

		%% Cell type:code id: tags:

		``` python
		dataset = NumpyDataset(data, labels) # creates numpy dataset object
		```

		%% Cell type:markdown id: tags:

		## Extracting X, y from NumpyDataset Object
		Extracting the data and labels from the NumpyDataset is very easy.

		%% Cell type:code id: tags:

		``` python
		dataset.X # Extracts the data (X) from the NumpyDataset Object
		```

		%% Output

		array([[0.63188616, 0.24690483, 0.85294168, 0.15512774],
		[0.62009111, 0.00525149, 0.56082693, 0.0649767 ],
		[0.57476389, 0.92047762, 0.36311505, 0.53421993],
		[0.5768823 , 0.51945064, 0.9655427 , 0.82099216]])

		%% Cell type:code id: tags:

		``` python
		dataset.y # Extracts the labels (y) from the NumpyDataset Object
		```

		%% Output

		array([[0.5102078 ],
		[0.76199464],
		[0.77398379],
		[0.09498917]])

		%% Cell type:markdown id: tags:

		## Weights of a dataset - w
		So apart from `X` and `y` which are the data and the labels, you can also assign weights `w` to each data instance. The dimension of `w` is same as that of `y`(which is Nx1 where N is the number of data instances).

		NOTE: By default `w` is a vector initialized with equal weights (all being 1).

		%% Cell type:code id: tags:

		``` python
		dataset.w # printing the weights that are assigned by default. Notice that they are a vector of 1's
		```

		%% Output

		array([[1.],
		[1.],
		[1.],
		[1.]])

		%% Cell type:code id: tags:

		``` python
		w = np.random.random((4,)) # initializing weights with random vector of size 20x1
		dataset_with_weights = NumpyDataset(data, labels, w) # creates numpy dataset object
		```

		%% Cell type:code id: tags:

		``` python
		dataset_with_weights.w
		```

		%% Output

		array([[0.85432113],
		[0.91847254],
		[0.59774769],
		[0.36659207]])

		%% Cell type:markdown id: tags:

		## Iterating over NumpyDataset
		In order to iterate over NumpyDataset, we use `itersamples` method. We iterate over 4 quantities, namely `X`, `y`, `w` and `ids`. The first three quantities are the same as discussed above and `ids` is the id of the data instance. By default the id is given in order starting from `1`

		%% Cell type:code id: tags:

		``` python
		for x, y, w, id in dataset.itersamples():
		print(x, y, w, id)
		```

		%% Output

		(array([0.63188616, 0.24690483, 0.85294168, 0.15512774]), array([0.5102078]), array([1.]), 0)
		(array([0.62009111, 0.00525149, 0.56082693, 0.0649767 ]), array([0.76199464]), array([1.]), 1)
		(array([0.57476389, 0.92047762, 0.36311505, 0.53421993]), array([0.77398379]), array([1.]), 2)
		(array([0.5768823 , 0.51945064, 0.9655427 , 0.82099216]), array([0.09498917]), array([1.]), 3)

		%% Cell type:markdown id: tags:

		You can also extract the ids by `dataset.ids`. This would return a numpy array consisting of the ids of the data instances.

		%% Cell type:code id: tags:

		``` python
		dataset.ids
		```

		%% Output

		array([0, 1, 2, 3], dtype=object)

		%% Cell type:markdown id: tags:

		## MNIST Example
		Just to get a better understanding, lets take read MNIST data and use `NumpyDataset` to store the data.

		%% Cell type:code id: tags:

		``` python
		from tensorflow.examples.tutorials.mnist import input_data
		```

		%% Cell type:code id: tags:

		``` python
		mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
		```

		%% Output

		Extracting MNIST_data/train-images-idx3-ubyte.gz
		Extracting MNIST_data/train-labels-idx1-ubyte.gz
		Extracting MNIST_data/t10k-images-idx3-ubyte.gz
		Extracting MNIST_data/t10k-labels-idx1-ubyte.gz

		%% Cell type:code id: tags:

		``` python
		# Load the numpy data of MNIST into NumpyDataset
		train = NumpyDataset(mnist.train.images, mnist.train.labels)
		valid = NumpyDataset(mnist.validation.images, mnist.validation.labels)
		```

		%% Cell type:code id: tags:

		``` python
		import matplotlib.pyplot as plt
		```

		%% Cell type:code id: tags:

		``` python
		# Visualize one sample
		sample = np.reshape(train.X[5], (28, 28))
		plt.imshow(sample)
		plt.show()
		```

		%% Output



		%% Cell type:markdown id: tags:

		## Numpy Array to tf.data.dataset()
		This is quite similar to getting a `NumpyDataset` object from numpy arrays.

		%% Cell type:code id: tags:

		``` python
		import tensorflow as tf
		data_small = np.random.random((4,5))
		label_small = np.random.random((4,))
		dataset = tf.data.Dataset.from_tensor_slices((data_small, label_small))
		print ("Data\n")
		print (data_small)
		print ("\n Labels")
		print (label_small)
		```

		%% Output

		Data

		[[0.78574579 0.79398959 0.64737371 0.20447343 0.55009141]
		[0.39201333 0.12299678 0.69700424 0.57494847 0.59895521]
		[0.711899 0.22786574 0.6436164 0.49713391 0.31487844]
		[0.95354154 0.67493395 0.84554228 0.15894518 0.0154379 ]]

		Labels
		[0.61605796 0.07695742 0.1084755 0.30322915]

		%% Cell type:markdown id: tags:

		## Extracting the numpy dataset from tf.data
		In order to extract the numpy array from the `tf.data`, you first need to define an `iterator` to iterate over the `tf.data.Dataset` object and then in the tensorflow session, run over the iterator to get the data instances. Let's have a look at how it's done.

		%% Cell type:code id: tags:

		``` python
		iterator = dataset.make_one_shot_iterator() # iterator
		next_element = iterator.get_next()
		numpy_data = np.zeros((4, 5))
		numpy_label = np.zeros((4,))
		sess = tf.Session() # tensorflow session
		for i in range(4):
		data_, label_ = sess.run(next_element) # data_ contains the data and label_ contains the labels that we fed in the previous step
		numpy_data[i, :] = data_
		numpy_label[i] = label_

		print ("Numpy Data")
		print(numpy_data)
		print ("\n Numpy Label")
		print(numpy_label)
		```

		%% Output

		Numpy Data
		[[0.78574579 0.79398959 0.64737371 0.20447343 0.55009141]
		[0.39201333 0.12299678 0.69700424 0.57494847 0.59895521]
		[0.711899 0.22786574 0.6436164 0.49713391 0.31487844]
		[0.95354154 0.67493395 0.84554228 0.15894518 0.0154379 ]]

		Numpy Label
		[0.61605796 0.07695742 0.1084755 0.30322915]

		%% Cell type:markdown id: tags:

		Now that you have the numpy arrays of `data` and `labels`, you can convert it to `NumpyDataset`.

		%% Cell type:code id: tags:

		``` python
		dataset_ = NumpyDataset(numpy_data, numpy_label) # convert to NumpyDataset
		dataset_.X # printing just to check if the data is same!!
		```

		%% Output

		array([[0.78574579, 0.79398959, 0.64737371, 0.20447343, 0.55009141],
		[0.39201333, 0.12299678, 0.69700424, 0.57494847, 0.59895521],
		[0.711899 , 0.22786574, 0.6436164 , 0.49713391, 0.31487844],
		[0.95354154, 0.67493395, 0.84554228, 0.15894518, 0.0154379 ]])

		%% Cell type:markdown id: tags:

		## Converting NumpyDataset to `tf.data`
		This can be easily done by the `make_iterator()` method of `NumpyDataset`. This converts the `NumpyDataset` to `tf.data`. Let's look how it's done!

		%% Cell type:code id: tags:

		``` python
		iterator_ = dataset_.make_iterator() # Using make_iterator for converting NumpyDataset to tf.data
		next_element_ = iterator_.get_next()

		sess = tf.Session() # tensorflow session
		data_and_labels = sess.run(next_element_) # data_ contains the data and label_ contains the labels that we fed in the previous step


		print ("Numpy Data")
		print(data_and_labels[0]) # Data in the first index
		print ("\n Numpy Label")
		print(data_and_labels[1]) # Labels in the second index
		```

		%% Output

		Numpy Data
		[[0.78574579 0.79398959 0.64737371 0.20447343 0.55009141]
		[0.95354154 0.67493395 0.84554228 0.15894518 0.0154379 ]
		[0.711899 0.22786574 0.6436164 0.49713391 0.31487844]
		[0.39201333 0.12299678 0.69700424 0.57494847 0.59895521]]

		Numpy Label
		[[0.61605796]
		[0.30322915]
		[0.1084755 ]
		[0.07695742]]

		%% Cell type:code id: tags:

		``` python
		```

		%% Cell type:code id: tags:

		``` python
		```

examples/notebooks/Estimators.ipynb

0 → 100644

+355 −0

File added.

Preview size limit exceeded, changes collapsed.

Admin message