Commit 48bbcfe4 authored by nitinprakash96's avatar nitinprakash96
Browse files

Merge branch 'master' of https://github.com/deepchem/deepchem

parents 8c85d155 d5282b37
Loading
Loading
Loading
Loading
+33 −0
Original line number Diff line number Diff line
@@ -1542,6 +1542,12 @@ class InteratomicL2Distances(Layer):


class SparseSoftMaxCrossEntropy(Layer):
  """Computes Sparse softmax cross entropy between logits and labels.
  labels: Tensor of shape [d_0,d_1,....,d_{r-1}](where r is rank of logits) and must be of dtype int32 or int64.
  logits: Unscaled log probabilities of shape [d_0,....d{r-1},num_classes] and of dtype float32 or float64.
  Note: the rank of the logits should be 1 greater than that of labels.
  The output will be a tensor of same shape as labels and of same type as logits with the loss.
  """

  def __init__(self, in_layers=None, **kwargs):
    super(SparseSoftMaxCrossEntropy, self).__init__(in_layers, **kwargs)
@@ -4309,3 +4315,30 @@ class GraphCNN(Layer):
    result = tf.matmul(A_reshape, B)
    result = tf.reshape(result, tf.stack([A_shape[0], A_shape[1], axis_2]))
    return result


class Hingeloss(Layer):
  """This layer computes the hinge loss on inputs:[labels,logits] 
  labels: The values of this tensor is expected to be 1.0 or 0.0. The shape should be the same as logits.
  logits: Holds the log probabilities for labels, a float tensor.
  The output is a weighted loss tensor of same shape as labels.
  """

  def __init__(self, in_layers=None, **kwargs):
    super(Hingeloss, self).__init__(in_layers, **kwargs)
    try:
      self._shape = self.in_layers[1].shape
    except:
      pass

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
    if len(inputs) != 2:
      raise ValueError()
    labels, logits = inputs[0], inputs[1]
    reduction = tf.losses.Reduction
    out_tensor = tf.losses.hinge_loss(
        labels=labels, logits=logits, reduction=reduction.NONE)
    if set_tensors:
      self.out_tensor = out_tensor
    return out_tensor
+27 −0
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@ from deepchem.models.tensorgraph.layers import GRU
from deepchem.models.tensorgraph.layers import Gather
from deepchem.models.tensorgraph.layers import GraphConv
from deepchem.models.tensorgraph.layers import GraphGather
from deepchem.models.tensorgraph.layers import Hingeloss
from deepchem.models.tensorgraph.layers import Input
from deepchem.models.tensorgraph.layers import InputFifoQueue
from deepchem.models.tensorgraph.layers import InteratomicL2Distances
@@ -45,6 +46,7 @@ from deepchem.models.tensorgraph.layers import Sigmoid
from deepchem.models.tensorgraph.layers import SigmoidCrossEntropy
from deepchem.models.tensorgraph.layers import SoftMax
from deepchem.models.tensorgraph.layers import SoftMaxCrossEntropy
from deepchem.models.tensorgraph.layers import SparseSoftMaxCrossEntropy
from deepchem.models.tensorgraph.layers import StopGradient
from deepchem.models.tensorgraph.layers import TensorWrapper
from deepchem.models.tensorgraph.layers import TimeSeriesDense
@@ -381,6 +383,18 @@ class TestLayers(test_util.TensorFlowTestCase):
      out_tensor = out_tensor.eval()
      assert out_tensor.shape == (batch_size,)

  def test_sparse_softmax_cross_entropy(self):
    batch_size = 10
    n_features = 5
    logit_tensor = np.random.rand(batch_size, n_features)
    label_tensor = np.random.rand(batch_size)
    with self.test_session() as sess:
      logit_tensor = tf.convert_to_tensor(logit_tensor, dtype=tf.float32)
      label_tensor = tf.convert_to_tensor(label_tensor, dtype=tf.int32)
      out_tensor = SparseSoftMaxCrossEntropy()(label_tensor, logit_tensor)
      out_tensor = out_tensor.eval()
      assert out_tensor.shape == (batch_size,)

  def test_reduce_mean(self):
    """Test that ReduceMean can be invoked."""
    batch_size = 10
@@ -875,3 +889,16 @@ class TestLayers(test_util.TensorFlowTestCase):
      assert out_tensor.shape == (batch_size, n_tasks)
      irv_reg = IRVRegularize(irv_layer, 1.)()
      assert irv_reg.eval() >= 0

  def test_hingeloss(self):

    labels = 1
    logits = 1
    logits_tensor = np.random.rand(logits)
    labels_tensor = np.random.rand(labels)
    with self.test_session() as sess:
      logits_tensor = tf.convert_to_tensor(logits_tensor, dtype=tf.float32)
      labels_tensor = tf.convert_to_tensor(labels_tensor, dtype=tf.float32)
      out_tensor = Hingeloss()(labels_tensor, logits_tensor)
      out_tensor = out_tensor.eval()
      assert out_tensor.shape == (labels,)
+22 −1
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@ from deepchem.models.tensorgraph.layers import Feature, Conv1D, Dense, Flatten,
  SoftMaxCrossEntropy, ReduceMean, ToFloat, ReduceSquareDifference, Conv2D, MaxPool2D, ReduceSum, GraphConv, GraphPool, \
  GraphGather, BatchNorm, WeightedError, ReLU, \
  Conv3D, MaxPool3D, Conv2DTranspose, Conv3DTranspose, \
  LSTMStep, AttnLSTMEmbedding, IterRefLSTMEmbedding, GraphEmbedPoolLayer, GraphCNN, Cast
  LSTMStep, AttnLSTMEmbedding, IterRefLSTMEmbedding, GraphEmbedPoolLayer, GraphCNN, Cast,Hingeloss,SparseSoftMaxCrossEntropy
from deepchem.models.tensorgraph.symmetry_functions import AtomicDifferentiatedDense
from deepchem.models.tensorgraph.IRV import IRVLayer, IRVRegularize, Slice

@@ -269,6 +269,17 @@ def test_SoftmaxCrossEntropy_pickle():
  tg.save()


def test_SparseSoftmaxCrossEntropy_pickle():
  tg = TensorGraph()
  logits = Feature(shape=(tg.batch_size, 5))
  labels = Feature(shape=(tg.batch_size,), dtype=tf.int32)
  layer = SparseSoftMaxCrossEntropy(in_layers=[labels, logits])
  tg.add_output(layer)
  tg.set_loss(layer)
  tg.build()
  tg.save()


def test_SigmoidCrossEntropy_pickle():
  tg = TensorGraph()
  feature = Feature(shape=(tg.batch_size, 1))
@@ -682,3 +693,13 @@ def test_Slice_pickle():
  tg.set_loss(out)
  tg.build()
  tg.save()


def test_hingeloss_pickle():
  tg = TensorGraph()
  feature = Feature(shape=(1, None))
  layer = Hingeloss(in_layers=[feature, feature])
  tg.add_output(layer)
  tg.set_loss(layer)
  tg.build()
  tg.save()
+325 −0
Original line number Diff line number Diff line
%% Cell type:markdown id: tags:

# Using Deepchem Datasets
In this tutorial we will have a look at various deepchem `dataset` methods present in `deepchem.datasets`.

%% Cell type:code id: tags:

``` python
import deepchem as dc
import numpy as np
import random
```

%% Output

    /home/skand/anaconda2/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
      from ._conv import register_converters as _register_converters

%% Cell type:markdown id: tags:

# Using NumpyDatasets
This is used when you have your data in numpy arrays.

%% Cell type:code id: tags:

``` python
# data is your dataset in numpy array of size : 20x20.
data = np.random.random((4, 4))
labels = np.random.random((4,)) # labels of size 20x1
```

%% Cell type:code id: tags:

``` python
from deepchem.data.datasets import NumpyDataset # import NumpyDataset
```

%% Cell type:code id: tags:

``` python
dataset = NumpyDataset(data, labels) # creates numpy dataset object
```

%% Cell type:markdown id: tags:

## Extracting X, y from NumpyDataset Object
Extracting the data and labels from the NumpyDataset is very easy.

%% Cell type:code id: tags:

``` python
dataset.X # Extracts the data (X) from the NumpyDataset Object
```

%% Output

    array([[0.63188616, 0.24690483, 0.85294168, 0.15512774],
           [0.62009111, 0.00525149, 0.56082693, 0.0649767 ],
           [0.57476389, 0.92047762, 0.36311505, 0.53421993],
           [0.5768823 , 0.51945064, 0.9655427 , 0.82099216]])

%% Cell type:code id: tags:

``` python
dataset.y # Extracts the labels (y) from the NumpyDataset Object
```

%% Output

    array([[0.5102078 ],
           [0.76199464],
           [0.77398379],
           [0.09498917]])

%% Cell type:markdown id: tags:

## Weights of a dataset - w
So apart from `X` and `y` which are the data and the labels, you can also assign weights `w` to each data instance. The dimension of `w` is same as that of `y`(which is Nx1 where N is the number of data instances).

**NOTE:** By default `w` is a vector initialized with equal weights (all being 1).

%% Cell type:code id: tags:

``` python
dataset.w # printing the weights that are assigned by default. Notice that they are a vector of 1's
```

%% Output

    array([[1.],
           [1.],
           [1.],
           [1.]])

%% Cell type:code id: tags:

``` python
w = np.random.random((4,)) # initializing weights with random vector of size 20x1
dataset_with_weights = NumpyDataset(data, labels, w) # creates numpy dataset object
```

%% Cell type:code id: tags:

``` python
dataset_with_weights.w
```

%% Output

    array([[0.85432113],
           [0.91847254],
           [0.59774769],
           [0.36659207]])

%% Cell type:markdown id: tags:

## Iterating over NumpyDataset
In order to iterate over NumpyDataset, we use `itersamples` method. We iterate over 4 quantities, namely `X`, `y`, `w` and `ids`. The first three quantities are the same as discussed above and `ids` is the id of the data instance. By default the id is given in order starting from `1`

%% Cell type:code id: tags:

``` python
for x, y, w, id in dataset.itersamples():
    print(x, y, w, id)
```

%% Output

    (array([0.63188616, 0.24690483, 0.85294168, 0.15512774]), array([0.5102078]), array([1.]), 0)
    (array([0.62009111, 0.00525149, 0.56082693, 0.0649767 ]), array([0.76199464]), array([1.]), 1)
    (array([0.57476389, 0.92047762, 0.36311505, 0.53421993]), array([0.77398379]), array([1.]), 2)
    (array([0.5768823 , 0.51945064, 0.9655427 , 0.82099216]), array([0.09498917]), array([1.]), 3)

%% Cell type:markdown id: tags:

You can also extract the ids by `dataset.ids`. This would return a numpy array consisting of the ids of the data instances.

%% Cell type:code id: tags:

``` python
dataset.ids
```

%% Output

    array([0, 1, 2, 3], dtype=object)

%% Cell type:markdown id: tags:

## MNIST Example
Just to get a better understanding, lets take read MNIST data and use `NumpyDataset` to store the data.

%% Cell type:code id: tags:

``` python
from tensorflow.examples.tutorials.mnist import input_data
```

%% Cell type:code id: tags:

``` python
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
```

%% Output

    Extracting MNIST_data/train-images-idx3-ubyte.gz
    Extracting MNIST_data/train-labels-idx1-ubyte.gz
    Extracting MNIST_data/t10k-images-idx3-ubyte.gz
    Extracting MNIST_data/t10k-labels-idx1-ubyte.gz

%% Cell type:code id: tags:

``` python
# Load the numpy data of MNIST into NumpyDataset
train = NumpyDataset(mnist.train.images, mnist.train.labels)
valid = NumpyDataset(mnist.validation.images, mnist.validation.labels)
```

%% Cell type:code id: tags:

``` python
import matplotlib.pyplot as plt
```

%% Cell type:code id: tags:

``` python
# Visualize one sample
sample = np.reshape(train.X[5], (28, 28))
plt.imshow(sample)
plt.show()
```

%% Output


%% Cell type:markdown id: tags:

## Numpy Array to tf.data.dataset()
This is quite similar to getting a `NumpyDataset` object from numpy arrays.

%% Cell type:code id: tags:

``` python
import tensorflow as tf
data_small = np.random.random((4,5))
label_small = np.random.random((4,))
dataset = tf.data.Dataset.from_tensor_slices((data_small, label_small))
print ("Data\n")
print (data_small)
print ("\n Labels")
print (label_small)
```

%% Output

    Data
    
    [[0.78574579 0.79398959 0.64737371 0.20447343 0.55009141]
     [0.39201333 0.12299678 0.69700424 0.57494847 0.59895521]
     [0.711899   0.22786574 0.6436164  0.49713391 0.31487844]
     [0.95354154 0.67493395 0.84554228 0.15894518 0.0154379 ]]
    
     Labels
    [0.61605796 0.07695742 0.1084755  0.30322915]

%% Cell type:markdown id: tags:

## Extracting the numpy dataset from tf.data
In order to extract the numpy array from the `tf.data`, you first need to define an `iterator` to iterate over the `tf.data.Dataset` object and then in the tensorflow session, run over the iterator to get the data instances. Let's have a look at how it's done.

%% Cell type:code id: tags:

``` python
iterator = dataset.make_one_shot_iterator() # iterator
next_element = iterator.get_next()
numpy_data = np.zeros((4, 5))
numpy_label = np.zeros((4,))
sess = tf.Session() # tensorflow session
for i in range(4):
    data_, label_ = sess.run(next_element) # data_ contains the data and label_ contains the labels that we fed in the previous step
    numpy_data[i, :] = data_
    numpy_label[i] = label_

print ("Numpy Data")
print(numpy_data)
print ("\n Numpy Label")
print(numpy_label)
```

%% Output

    Numpy Data
    [[0.78574579 0.79398959 0.64737371 0.20447343 0.55009141]
     [0.39201333 0.12299678 0.69700424 0.57494847 0.59895521]
     [0.711899   0.22786574 0.6436164  0.49713391 0.31487844]
     [0.95354154 0.67493395 0.84554228 0.15894518 0.0154379 ]]
    
     Numpy Label
    [0.61605796 0.07695742 0.1084755  0.30322915]

%% Cell type:markdown id: tags:

Now that you have the numpy arrays of `data` and `labels`, you can convert it to `NumpyDataset`.

%% Cell type:code id: tags:

``` python
dataset_ = NumpyDataset(numpy_data, numpy_label) # convert to NumpyDataset
dataset_.X  # printing just to check if the data is same!!
```

%% Output

    array([[0.78574579, 0.79398959, 0.64737371, 0.20447343, 0.55009141],
           [0.39201333, 0.12299678, 0.69700424, 0.57494847, 0.59895521],
           [0.711899  , 0.22786574, 0.6436164 , 0.49713391, 0.31487844],
           [0.95354154, 0.67493395, 0.84554228, 0.15894518, 0.0154379 ]])

%% Cell type:markdown id: tags:

## Converting NumpyDataset to `tf.data`
This can be easily done by the `make_iterator()` method of `NumpyDataset`. This converts the `NumpyDataset` to `tf.data`. Let's look how it's done!

%% Cell type:code id: tags:

``` python
iterator_ = dataset_.make_iterator() # Using make_iterator for converting NumpyDataset to tf.data
next_element_ = iterator_.get_next()

sess = tf.Session() # tensorflow session
data_and_labels = sess.run(next_element_) # data_ contains the data and label_ contains the labels that we fed in the previous step


print ("Numpy Data")
print(data_and_labels[0])  # Data in the first index
print ("\n Numpy Label")
print(data_and_labels[1])  # Labels in the second index
```

%% Output

    Numpy Data
    [[0.78574579 0.79398959 0.64737371 0.20447343 0.55009141]
     [0.95354154 0.67493395 0.84554228 0.15894518 0.0154379 ]
     [0.711899   0.22786574 0.6436164  0.49713391 0.31487844]
     [0.39201333 0.12299678 0.69700424 0.57494847 0.59895521]]
    
     Numpy Label
    [[0.61605796]
     [0.30322915]
     [0.1084755 ]
     [0.07695742]]

%% Cell type:code id: tags:

``` python
```

%% Cell type:code id: tags:

``` python
```
+355 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading