Unverified Commit 52c2350c authored by Karl Leswing's avatar Karl Leswing Committed by GitHub
Browse files

Merge pull request #1300 from pvskand/master

DataTransforms API for Images 
parents 5820f4c9 2979c920
Loading
Loading
Loading
Loading
+55 −0
Original line number Diff line number Diff line
@@ -6,6 +6,8 @@ from __future__ import unicode_literals

from deepchem.molnet import load_delaney
from deepchem.trans.transformers import FeaturizationTransformer
from deepchem.trans.transformers import DataTransforms
from tensorflow.examples.tutorials.mnist import input_data

__author__ = "Bharath Ramsundar"
__copyright__ = "Copyright 2016, Stanford University"
@@ -16,6 +18,7 @@ import unittest
import numpy as np
import pandas as pd
import deepchem as dc
import scipy.ndimage


class TestTransformers(unittest.TestCase):
@@ -26,6 +29,18 @@ class TestTransformers(unittest.TestCase):
  def setUp(self):
    super(TestTransformers, self).setUp()
    self.current_dir = os.path.dirname(os.path.abspath(__file__))
    '''
       init to load the MNIST data for DataTransforms Tests
      '''
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    # extracting validation set of MNIST for testing the DataTransforms
    valid = dc.data.NumpyDataset(mnist.validation.images,
                                 mnist.validation.labels)
    # extract only the images (no need of the labels)
    data = (valid.X)[0]
    # reshaping the vector to image
    data = np.reshape(data, (28, 28))
    self.d = data

  def test_y_log_transformer(self):
    """Tests logarithmic data transformer."""
@@ -482,3 +497,43 @@ class TestTransformers(unittest.TestCase):

    self.assertEqual(new_train.y.shape, train.y.shape)
    self.assertEqual(new_train.X.shape[-1], fp_size)

  def test_blurring(self):
    # Check Blurring
    dt = DataTransforms(self.d)
    blurred = dt.gaussian_blur(sigma=1.5)
    check_blur = scipy.ndimage.gaussian_filter(self.d, 1.5)
    assert np.allclose(check_blur, blurred)

  def test_rotation(self):
    # Check rotation
    dt = DataTransforms(self.d)
    angles = [0, 5, 10, 90]
    for ang in angles:
      rotate = dt.rotate(ang)
      check_rotate = scipy.ndimage.rotate(self.d, ang)
      assert np.allclose(rotate, check_rotate)

    # Some more test cases for flip
    rotate = dt.rotate(-90)
    check_rotate = scipy.ndimage.rotate(self.d, 270)
    assert np.allclose(rotate, check_rotate)

  def test_flipping(self):
    # Check flip
    dt = DataTransforms(self.d)
    flip_lr = dt.flip(direction="lr")
    flip_ud = dt.flip(direction="ud")
    check_lr = np.fliplr(self.d)
    check_ud = np.flipud(self.d)
    assert np.allclose(flip_ud, check_ud)
    assert np.allclose(flip_lr, check_lr)

  def test_scaling(self):
    # Check Scales
    dt = DataTransforms(self.d)
    h = 150
    w = 150
    scale = scipy.misc.imresize(self.d, (h, w))
    check_scale = dt.scale(h, w)
    np.allclose(scale, check_scale)
+57 −14
Original line number Diff line number Diff line
@@ -570,8 +570,8 @@ class CoulombFitTransformer(Transformer):

    def _realize_(x):
      assert (len(x.shape) == 2)
      inds = np.argsort(
          -(x**2).sum(axis=0)**.5 + np.random.normal(0, self.noise, x[0].shape))
      inds = np.argsort(-(x**2).sum(axis=0)**.5 +
                        np.random.normal(0, self.noise, x[0].shape))
      x = x[inds, :][:, inds] * 1
      x = x.flatten()[self.triuind]
      return x
@@ -704,8 +704,8 @@ class IRVTransformer():
      feed_dict = {}
      with tf.Session() as sess:
        for count in range(target_len // 100 + 1):
          feed_dict[similarity_placeholder] = similarity_xs[count * 100:min((
              count + 1) * 100, target_len), :]
          feed_dict[similarity_placeholder] = similarity_xs[count * 100:min(
              (count + 1) * 100, target_len), :]
          # generating batch of data by slicing similarity matrix
          # into 100*reference_dataset_length
          fetched_values = sess.run([value, top_label], feed_dict=feed_dict)
@@ -749,9 +749,9 @@ class IRVTransformer():
    n_features = X_target.shape[1]
    print('start similarity calculation')
    time1 = time.time()
    similarity = IRVTransformer.matrix_mul(X_target, np.transpose(self.X)) / (
        n_features -
        IRVTransformer.matrix_mul(1 - X_target, np.transpose(1 - self.X)))
    similarity = IRVTransformer.matrix_mul(X_target, np.transpose(
        self.X)) / (n_features - IRVTransformer.matrix_mul(
            1 - X_target, np.transpose(1 - self.X)))
    time2 = time.time()
    print('similarity calculation takes %i s' % (time2 - time1))
    for i in range(self.n_tasks):
@@ -775,10 +775,11 @@ class IRVTransformer():
    for X1_id in range(X1_iter):
      result = np.zeros((1,))
      for X2_id in range(X2_iter):
        partial_result = np.matmul(X1[X1_id * shard_size:min((
            X1_id + 1) * shard_size, X1_shape[0]), :],
                                   X2[:, X2_id * shard_size:min((
                                       X2_id + 1) * shard_size, X2_shape[1])])
        partial_result = np.matmul(
            X1[X1_id * shard_size:min((X1_id + 1) *
                                      shard_size, X1_shape[0]), :],
            X2[:, X2_id * shard_size:min((X2_id + 1) *
                                         shard_size, X2_shape[1])])
        # calculate matrix multiplicatin on slices
        if result.size == 1:
          result = partial_result
@@ -1007,9 +1008,7 @@ class ANITransformer(Transformer):
        end = min((start + 1) * batch_size, X.shape[0])
        X_batch = X[(start * batch_size):end]
        output = self.sess.run(
            [self.outputs], feed_dict={
                self.inputs: X_batch
            })[0]
            [self.outputs], feed_dict={self.inputs: X_batch})[0]
        X_out.append(output)
        num_transformed = num_transformed + X_batch.shape[0]
        print('%i samples transformed' % num_transformed)
@@ -1182,3 +1181,47 @@ class FeaturizationTransformer(Transformer):
  def transform_array(self, X, y, w):
    X = self.featurizer.featurize(X)
    return X, y, w


class DataTransforms(Transformer):
  """Applies different data transforms to images."""

  def __init__(self, Image):
    self.Image = Image

  def scale(self, h, w):
    """ Scales the image
            Parameters:
                h - height of the images
                w - width of the images
    """
    return scipy.misc.imresize(self.Image, (h, w))

  def flip(self, direction="lr"):
    """ Flips the image
          Parameters:
              direction - "lr" denotes left-right fliplr
                          "ud" denotes up-down flip
    """
    if direction == "lr":
      return np.fliplr(self.Image)
    elif direction == "ud":
      return np.flipud(self.Image)
    else:
      raise ValueError(
          "Invalid flip command : Enter either lr (for left to right flip) or ud (for up to down flip)"
      )

  def rotate(self, angle=0):
    """ Rotates the image
          Parameters:
              angle (default = 0 i.e no rotation) - Denotes angle by which the image should be rotated (in Degrees)
    """
    return scipy.ndimage.rotate(self.Image, angle)

  def gaussian_blur(self, sigma=0.2):
    """ Adds gaussian noise to the image
          Parameters:
            sigma - std dev. of the gaussian distribution
    """
    return scipy.ndimage.gaussian_filter(self.Image, sigma)