Commit 75550c33 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #521 from lilleswing/new-layers

Weight Initialization and Shared Layers
parents 691e4a91 0ec1a221
Loading
Loading
Loading
Loading
+349 −22
Original line number Diff line number Diff line
@@ -2,15 +2,17 @@ import random
import string

import tensorflow as tf
import numpy as np

from deepchem.nn import model_ops, initializations


class Layer(object):
  layer_number_dict = {}

  def __init__(self, in_layers=None, **kwargs):
    if "name" not in kwargs:
      self.name = "%s%s" % (self.__class__.__name__, self._random_name())
      self.name = "%s_%s" % (self.__class__.__name__, self._get_layer_number())
    else:
      self.name = kwargs['name']
    if "tensorboard" not in kwargs:
@@ -20,10 +22,14 @@ class Layer(object):
    if in_layers is None:
      in_layers = list()
    self.in_layers = in_layers
    self.op_type = "gpu"

  def _random_name(self):
    return ''.join(
        random.choice(string.ascii_uppercase + string.digits) for _ in range(4))
  def _get_layer_number(self):
    class_name = self.__class__.__name__
    if class_name not in Layer.layer_number_dict:
      Layer.layer_number_dict[class_name] = 0
    Layer.layer_number_dict[class_name] += 1
    return "%s" % Layer.layer_number_dict[class_name]

  def none_tensors(self):
    out_tensor = self.out_tensor
@@ -34,7 +40,7 @@ class Layer(object):
    self.out_tensor = tensor

  def _create_tensor(self):
    raise ValueError("Subclasses must implement for themselves")
    raise NotImplementedError("Subclasses must implement for themselves")

  def __key(self):
    return self.name
@@ -49,6 +55,20 @@ class Layer(object):
  def __hash__(self):
    return hash(self.__key())

  def shared(self, in_layers):
    """
    Share weights with different in tensors and a new out tensor
    Parameters
    ----------
    in_layers: list tensor
    List in tensors for the shared layer

    Returns
    -------
    Layer
    """
    raise ValueError("Each Layer must implement shared for itself")


class Conv1DLayer(Layer):

@@ -77,30 +97,64 @@ class Conv1DLayer(Layer):

class Dense(Layer):

  def __init__(self, out_channels, activation_fn=None, **kwargs):
  def __init__(
      self,
      out_channels,
      activation_fn=None,
      biases_initializer=tf.zeros_initializer,
      weights_initializer=tf.contrib.layers.variance_scaling_initializer,
      time_series=False,
      scope_name=None,
      reuse=False,
      **kwargs):
    super(Dense, self).__init__(**kwargs)
    self.out_channels = out_channels
    self.out_tensor = None
    self.activation_fn = activation_fn
    super(Dense, self).__init__(**kwargs)
    self.biases_initializer = biases_initializer
    self.weights_initializer = weights_initializer
    self.time_series = time_series
    self.reuse = reuse
    if scope_name is None:
      scope_name = self.name
    self.scope_name = scope_name

  def _create_tensor(self):
    if len(self.in_layers) != 1:
      raise ValueError("Only One Parent to Dense over %s" % self.in_layers)
    parent = self.in_layers[0]
    if len(parent.out_tensor.get_shape()) != 2:
      raise ValueError("Parent tensor must be (batch, width)")
    in_channels = parent.out_tensor.get_shape()[-1].value
    # w = initializations.glorot_uniform([in_channels, self.out_channels])
    # w = model_ops.zeros(shape=[in_channels, self.out_channels])
    # b = tf.Variable([0.0, 0.0])
    # self.out_tensor = tf.matmul(parent.out_tensor, w) + b
    if not self.time_series:
      self.out_tensor = tf.contrib.layers.fully_connected(
          parent.out_tensor,
          num_outputs=self.out_channels,
          activation_fn=self.activation_fn,
        scope=self.name,
          biases_initializer=self.biases_initializer(),
          weights_initializer=self.weights_initializer(),
          scope=self.scope_name,
          reuse=self.reuse,
          trainable=True)
      return self.out_tensor
    dense_fn = lambda x: tf.contrib.layers.fully_connected(x,
                                                           num_outputs=self.out_channels,
                                                           activation_fn=self.activation_fn,
                                                           biases_initializer=self.biases_initializer(),
                                                           weights_initializer=self.weights_initializer(),
                                                           scope=self.scope_name,
                                                           reuse=self.reuse,
                                                           trainable=True)
    self.out_tensor = tf.map_fn(dense_fn, parent.out_tensor)

  def shared(self, in_layers):
    self.reuse = True
    return Dense(
        self.out_channels,
        self.activation_fn,
        self.biases_initializer,
        self.weights_initializer,
        time_series=self.time_series,
        reuse=self.reuse,
        scope_name=self.scope_name,
        in_layers=in_layers)


class Flatten(Layer):
@@ -132,6 +186,19 @@ class Reshape(Layer):
    self.out_tensor = tf.reshape(parent_tensor, self.shape)


class Transpose(Layer):

  def __init__(self, out_shape, **kwargs):
    super(Transpose, self).__init__(**kwargs)
    self.out_shape = out_shape

  def _create_tensor(self):
    if len(self.in_layers) != 1:
      raise ValueError("Only One Parent to Transpose over")
    self.out_tensor = tf.transpose(self.in_layers[0].out_tensor, self.out_shape)
    return self.out_tensor


class CombineMeanStd(Layer):

  def __init__(self, **kwargs):
@@ -188,7 +255,6 @@ class GRU(Layer):
class TimeSeriesDense(Layer):

  def __init__(self, out_channels, **kwargs):
    self.out_channels = out_channels
    super(TimeSeriesDense, self).__init__(**kwargs)

  def _create_tensor(self):
@@ -206,6 +272,7 @@ class Input(Layer):
    self.shape = shape
    self.dtype = dtype
    super(Input, self).__init__(**kwargs)
    self.op_type = "cpu"

  def _create_tensor(self):
    if len(self.in_layers) > 0:
@@ -372,6 +439,7 @@ class InputFifoQueue(Layer):
    self.capacity = capacity
    self.dtypes = dtypes
    super(InputFifoQueue, self).__init__(**kwargs)
    self.op_type = "cpu"

  def _create_tensor(self):
    if self.dtypes is None:
@@ -609,3 +677,262 @@ class WeightedError(Layer):
    entropy, weights = self.in_layers[0], self.in_layers[1]
    self.out_tensor = tf.reduce_sum(entropy.out_tensor * weights.out_tensor)
    return self.out_tensor


class AtomicConvolution(Layer):

  def __init__(self,
               atom_types=None,
               radial_params=list(),
               boxsize=None,
               **kwargs):
    """Atomic convoluation layer

    N = max_num_atoms, M = max_num_neighbors, B = batch_size, d = num_features
    l = num_radial_filters * num_atom_types

    Parameters
    ----------
    
    atom_types: list or None
      Of length a, where a is number of atom types for filtering.
    radial_params: list
      Of length l, where l is number of radial filters learned.
    boxsize: float or None
      Simulation box length [Angstrom].
    
    """
    self.boxsize = boxsize
    self.radial_params = radial_params
    self.atom_types = atom_types
    super(AtomicConvolution, self).__init__(**kwargs)

  def _create_tensor(self):
    """
    Parameters
    ----------
    X: tf.Tensor of shape (B, N, d)
      Coordinates/features.
    Nbrs: tf.Tensor of shape (B, N, M)
      Neighbor list.
    Nbrs_Z: tf.Tensor of shape (B, N, M)
      Atomic numbers of neighbor atoms.
      
    
    
    Returns
    -------
    layer: tf.Tensor of shape (l, B, N)
      A new tensor representing the output of the atomic conv layer 
    """

    X = self.in_layers[0].out_tensor
    Nbrs = tf.to_int32(self.in_layers[1].out_tensor)
    Nbrs_Z = self.in_layers[2].out_tensor

    # N: Maximum number of atoms
    # M: Maximum number of neighbors
    # d: Number of coordinates/features/filters
    # B: Batch Size
    N = X.get_shape()[-2].value
    d = X.get_shape()[-1].value
    M = Nbrs.get_shape()[-1].value
    B = X.get_shape()[0].value

    D = self.distance_tensor(X, Nbrs, self.boxsize, B, N, M, d)
    R = self.distance_matrix(D)
    sym = []
    rsf_zeros = tf.zeros((B, N, M))
    for param in self.radial_params:

      # We apply the radial pooling filter before atom type conv
      # to reduce computation
      rsf = self.radial_symmetry_function(R, *param)

      if not self.atom_types:
        cond = tf.not_equal(Nbrs_Z, 0.0)
        sym.append(tf.reduce_sum(tf.where(cond, rsf, rsf_zeros), 2))
      else:
        for j in range(len(self.atom_types)):
          cond = tf.equal(Nbrs_Z, self.atom_types[j])
          sym.append(tf.reduce_sum(tf.where(cond, rsf, rsf_zeros), 2))

    # Pack l (B, N) tensors into one (l, B, N) tensor
    # Transpose to (B, N, l) for conv layer stacking
    # done inside conv_layer loops to reduce transpose ops
    # Final layer should be shape (N, B, l) to pass into tf.map_fn
    # TODO (LESWING) batch norm
    self.out_tensor = tf.stack(sym)
    return self.out_tensor

  def radial_symmetry_function(self, R, rc, rs, e):
    """Calculates radial symmetry function.
  
    B = batch_size, N = max_num_atoms, M = max_num_neighbors, d = num_filters
  
    Parameters
    ----------
    R: tf.Tensor of shape (B, N, M)
      Distance matrix.
    rc: float
      Interaction cutoff [Angstrom].
    rs: float
      Gaussian distance matrix mean.
    e: float
      Gaussian distance matrix width.
  
    Returns
    -------
    retval: tf.Tensor of shape (B, N, M)
      Radial symmetry function (before summation)
  
    """

    with tf.name_scope(None, "NbrRadialSymmetryFunction", [rc, rs, e]):
      rc = tf.Variable(rc)
      rs = tf.Variable(rs)
      e = tf.Variable(e)
      K = self.gaussian_distance_matrix(R, rs, e)
      FC = self.radial_cutoff(R, rc)
    return tf.multiply(K, FC)

  def radial_cutoff(self, R, rc):
    """Calculates radial cutoff matrix.

    B = batch_size, N = max_num_atoms, M = max_num_neighbors

    Parameters
    ----------
      R [B, N, M]: tf.Tensor
        Distance matrix.
      rc: tf.Variable
        Interaction cutoff [Angstrom].

    Returns
    -------
      FC [B, N, M]: tf.Tensor
        Radial cutoff matrix.

    """

    T = 0.5 * (tf.cos(np.pi * R / (rc)) + 1)
    E = tf.zeros_like(T)
    cond = tf.less_equal(R, rc)
    FC = tf.where(cond, T, E)
    return FC

  def gaussian_distance_matrix(self, R, rs, e):
    """Calculates gaussian distance matrix.

    B = batch_size, N = max_num_atoms, M = max_num_neighbors

    Parameters
    ----------
      R [B, N, M]: tf.Tensor
        Distance matrix.
      rs: tf.Variable
        Gaussian distance matrix mean.
      e: tf.Variable
        Gaussian distance matrix width (e = .5/std**2).

    Returns
    -------
      retval [B, N, M]: tf.Tensor
        Gaussian distance matrix.

    """

    return tf.exp(-e * (R - rs)**2)

  def distance_tensor(self, X, Nbrs, boxsize, B, N, M, d):
    """Calculates distance tensor for batch of molecules.

    B = batch_size, N = max_num_atoms, M = max_num_neighbors, d = num_features

    Parameters
    ----------
    X: tf.Tensor of shape (B, N, d)
      Coordinates/features tensor.
    Nbrs: tf.Tensor of shape (B, N, M)
      Neighbor list tensor.
    boxsize: float or None
      Simulation box length [Angstrom].

    Returns
    -------
    D: tf.Tensor of shape (B, N, M, d)
      Coordinates/features distance tensor.

    """
    atom_tensors = tf.unstack(X, axis=1)
    nbr_tensors = tf.unstack(Nbrs, axis=1)
    D = []
    if boxsize is not None:
      for atom, atom_tensor in enumerate(atom_tensors):
        nbrs = self.gather_neighbors(X, nbr_tensors[atom], B, N, M, d)
        nbrs_tensors = tf.unstack(nbrs, axis=1)
        for nbr, nbr_tensor in enumerate(nbrs_tensors):
          _D = tf.subtract(nbr_tensor, atom_tensor)
          _D = tf.subtract(_D, boxsize * tf.round(tf.div(_D, boxsize)))
          D.append(_D)
    else:
      for atom, atom_tensor in enumerate(atom_tensors):
        nbrs = self.gather_neighbors(X, nbr_tensors[atom], B, N, M, d)
        nbrs_tensors = tf.unstack(nbrs, axis=1)
        for nbr, nbr_tensor in enumerate(nbrs_tensors):
          _D = tf.subtract(nbr_tensor, atom_tensor)
          D.append(_D)
    D = tf.stack(D)
    D = tf.transpose(D, perm=[1, 0, 2])
    D = tf.reshape(D, [B, N, M, d])
    return D

  def gather_neighbors(self, X, nbr_indices, B, N, M, d):
    """Gathers the neighbor subsets of the atoms in X.
  
    B = batch_size, N = max_num_atoms, M = max_num_neighbors, d = num_features
  
    Parameters
    ----------
    X: tf.Tensor of shape (B, N, d)
      Coordinates/features tensor.
    atom_indices: tf.Tensor of shape (B, M)
      Neighbor list for single atom.
  
    Returns
    -------
    neighbors: tf.Tensor of shape (B, M, d)
      Neighbor coordinates/features tensor for single atom.
  
    """

    example_tensors = tf.unstack(X, axis=0)
    example_nbrs = tf.unstack(nbr_indices, axis=0)
    all_nbr_coords = []
    for example, (example_tensor,
                  example_nbr) in enumerate(zip(example_tensors, example_nbrs)):
      nbr_coords = tf.gather(example_tensor, example_nbr)
      all_nbr_coords.append(nbr_coords)
    neighbors = tf.stack(all_nbr_coords)
    return neighbors

  def distance_matrix(self, D):
    """Calcuates the distance matrix from the distance tensor

    B = batch_size, N = max_num_atoms, M = max_num_neighbors, d = num_features

    Parameters
    ----------
    D: tf.Tensor of shape (B, N, M, d)
      Distance tensor.

    Returns
    -------
    R: tf.Tensor of shape (B, N, M)
       Distance matrix.

    """

    R = tf.reduce_sum(tf.multiply(D, D), 3)
    R = tf.sqrt(R)
    return R
+6 −2
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@ class TensorGraph(Model):
               tensorboard_log_frequency=100,
               learning_rate=0.001,
               batch_size=100,
               random_seed=None,
               use_queue=True,
               mode="regression",
               **kwargs):
@@ -81,6 +82,7 @@ class TensorGraph(Model):

    self.learning_rate = learning_rate
    self.batch_size = batch_size
    self.random_seed = random_seed
    super(TensorGraph, self).__init__(**kwargs)
    self.save_file = "%s/%s" % (self.model_dir, "model")
    self.model_class = None
@@ -307,6 +309,8 @@ class TensorGraph(Model):
    if self.built:
      return
    with self._get_tf("Graph").as_default():
      if self.random_seed is not None:
        tf.set_random_seed(self.random_seed)
      self._install_queue()
      order = self.topsort()
      print(order)
@@ -387,7 +391,7 @@ class TensorGraph(Model):
    self.tensor_objects = tensor_objects

  def evaluate_generator(self,
                         dataset,
                         feed_dict_generator,
                         metrics,
                         transformers=[],
                         labels=None,
@@ -399,7 +403,7 @@ class TensorGraph(Model):
      raise ValueError
    evaluator = GeneratorEvaluator(
        self,
        dataset,
        feed_dict_generator,
        transformers,
        labels=labels,
        outputs=outputs,
+42 −1
Original line number Diff line number Diff line
@@ -73,7 +73,7 @@ class TestTensorGraph(unittest.TestCase):

    tg.fit_generator(
        databag.iterbatches(
            epochs=100, batch_size=tg.batch_size, pad_batches=True))
            epochs=5000, batch_size=tg.batch_size, pad_batches=True))
    prediction = tg.predict_proba_on_generator(databag.iterbatches())
    for i in range(2):
      y_real = ys[i].X
@@ -207,3 +207,44 @@ class TestTensorGraph(unittest.TestCase):
    tg1 = TensorGraph.load_from_dir(tg.model_dir)
    prediction2 = np.squeeze(tg1.predict_proba_on_batch(X))
    assert_true(np.all(np.isclose(prediction, prediction2, atol=0.01)))

  def test_shared_layer(self):
    n_data_points = 20
    n_features = 2

    X = np.random.rand(n_data_points, n_features)
    y1 = np.array([[0, 1] for x in range(n_data_points)])
    X = NumpyDataset(X)
    ys = [NumpyDataset(y1)]

    databag = Databag()

    features = Feature(shape=(None, n_features))
    databag.add_dataset(features, X)

    outputs = []

    label = Label(shape=(None, 2))
    dense1 = Dense(out_channels=2, in_layers=[features])
    dense2 = dense1.shared(in_layers=[features])
    output1 = SoftMax(in_layers=[dense1])
    output2 = SoftMax(in_layers=[dense2])
    smce = SoftMaxCrossEntropy(in_layers=[label, dense1])

    outputs.append(output1)
    outputs.append(output2)
    databag.add_dataset(label, ys[0])

    total_loss = ReduceMean(in_layers=[smce])

    tg = dc.models.TensorGraph(learning_rate=0.1)
    for output in outputs:
      tg.add_output(output)
    tg.set_loss(total_loss)

    tg.fit_generator(
        databag.iterbatches(
            epochs=1, batch_size=tg.batch_size, pad_batches=True))
    prediction = tg.predict_proba_on_generator(databag.iterbatches())
    assert_true(
        np.all(np.isclose(prediction[:, 0], prediction[:, 1], atol=0.01)))
+1 −1
Original line number Diff line number Diff line
@@ -738,7 +738,7 @@ class TestOverfit(test_util.TensorFlowTestCase):
        n_tasks,
        n_feat,
        batch_size=batch_size,
        learning_rate=0.005,
        learning_rate=0.01,
        learning_rate_decay_time=1000,
        optimizer_type="adam",
        beta1=.9,
+8 −2
Original line number Diff line number Diff line
@@ -114,8 +114,10 @@ class TestGeneratorEvaluator(TestCase):
    assert_true(np.isclose(scores, [1.0], atol=0.05))

  def test_compute_model_performance_multitask_regressor(self):
    random_seed = 42
    n_data_points = 20
    n_features = 2
    np.random.seed(seed=random_seed)

    X = np.random.rand(n_data_points, n_features)
    y1 = np.expand_dims(np.array([0.5 for x in range(n_data_points)]), axis=-1)
@@ -143,7 +145,11 @@ class TestGeneratorEvaluator(TestCase):

    total_loss = ReduceMean(in_layers=losses)

    tg = dc.models.TensorGraph(mode="regression", learning_rate=0.1)
    tg = dc.models.TensorGraph(
        mode="regression",
        batch_size=20,
        random_seed=random_seed,
        learning_rate=0.1)
    for output in outputs:
      tg.add_output(output)
    tg.set_loss(total_loss)
@@ -158,7 +164,7 @@ class TestGeneratorEvaluator(TestCase):
    scores = tg.evaluate_generator(
        databag.iterbatches(), metric, labels=labels, per_task_metrics=True)
    scores = list(scores[1].values())
    assert_true(np.all(np.isclose(scores, [0.0, 0.0], atol=0.5)))
    assert_true(np.all(np.isclose(scores, [0.0, 0.0], atol=1.0)))

  def test_compute_model_performance_singletask_regressor(self):
    n_data_points = 20