Commit 8e765720 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #805 from galenxing/master

Adding layers for Sluice Networks
parents d7d140a9 cb8f2d3a
Loading
Loading
Loading
Loading
+206 −0
Original line number Diff line number Diff line
@@ -2705,3 +2705,209 @@ class AtomicConvolution(Layer):
    R = tf.reduce_sum(tf.multiply(D, D), 3)
    R = tf.sqrt(R)
    return R


def AlphaShare(in_layers=None, **kwargs):
  """
  This method should be used when constructing AlphaShare layers from Sluice Networks
  
  Parameters
  ----------
  in_layers: list of Layers or tensors
    tensors in list must be the same size and list must include two or more tensors

  Returns
  -------
  output_layers: list of Layers or tensors with same size as in_layers
    Distance matrix.

  References:
  Sluice networks: Learning what to share between loosely related tasks
  https://arxiv.org/abs/1705.08142
  """
  output_layers = []
  alpha_share = AlphaShareLayer(in_layers=in_layers, **kwargs)
  num_outputs = len(in_layers)
  for num_layer in range(0, num_outputs):
    ls = LayerSplitter(output_num=num_layer, in_layers=alpha_share)
    output_layers.append(ls)
  return output_layers


class AlphaShareLayer(Layer):
  """
  Part of a sluice network. Adds alpha parameters to control
  sharing between the main and auxillary tasks

  Factory method AlphaShare should be used for construction

  Parameters
  ----------
  in_layers: list of Layers or tensors
    tensors in list must be the same size and list must include two or more tensors

  Returns
  -------
  out_tensor: a tensor with shape [len(in_layers), x, y] where x, y were the original layer dimensions
    out_tensor should be fed into LayerSplitter 
  Distance matrix.
  """

  def __init__(self, **kwargs):
    super(AlphaShareLayer, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
    # check that there isnt just one or zero inputs
    if len(inputs) <= 1:
      raise ValueError("AlphaShare must have more than one input")
    self.num_outputs = len(inputs)
    # create subspaces
    subspaces = []
    original_cols = int(inputs[0].get_shape()[-1].value)
    subspace_size = int(original_cols / 2)
    for input_tensor in inputs:
      subspaces.append(tf.reshape(input_tensor[:, :subspace_size], [-1]))
      subspaces.append(tf.reshape(input_tensor[:, subspace_size:], [-1]))
    n_alphas = len(subspaces)
    subspaces = tf.reshape(tf.stack(subspaces), [n_alphas, -1])

    # create the alpha learnable parameters
    alphas = tf.Variable(tf.random_normal([n_alphas, n_alphas]), name='alphas')

    subspaces = tf.matmul(alphas, subspaces)

    # concatenate subspaces, reshape to size of original input, then stack
    # such that out_tensor has shape (2,?,original_cols)
    count = 0
    out_tensor = []
    tmp_tensor = []
    for row in range(n_alphas):
      tmp_tensor.append(tf.reshape(subspaces[row,], [-1, subspace_size]))
      count += 1
      if (count == 2):
        out_tensor.append(tf.concat(tmp_tensor, 1))
        tmp_tensor = []
        count = 0

    out_tensor = tf.stack(out_tensor)

    self.alphas = alphas
    if set_tensors:
      self.out_tensor = out_tensor
    return out_tensor

  def none_tensors(self):
    num_outputs, out_tensor, alphas = self.num_outputs, self.out_tensor, self.alphas
    self.num_outputs = None
    self.out_tensor = None
    self.alphas = None
    return num_outputs, out_tensor, alphas

  def set_tensors(self, tensor):
    self.num_outputs, self.out_tensor, self.alphas = tensor


class LayerSplitter(Layer):
  """
  Returns the nth output of a layer
  Assumes out_tensor has shape [x, :] where x is the total number of intended output tensors
  """

  def __init__(self, output_num, **kwargs):
    """
    Parameters
    ----------
    output_num: int
        returns the out_tensor[output_num, :] of a layer
    """
    self.output_num = output_num
    super(LayerSplitter, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)[0]
    self.out_tensor = inputs[self.output_num, :]
    out_tensor = self.out_tensor
    return self.out_tensor

  def none_tensors(self):
    out_tensor = self.out_tensor
    self.out_tensor = None
    return out_tensor

  def set_tensors(self, tensor):
    self.out_tensor = tensor


class SluiceLoss(Layer):
  """
  Calculates the loss in a Sluice Network
  Every input into an AlphaShare should be used in SluiceLoss
  """

  def __init__(self, **kwargs):
    super(SluiceLoss, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    inputs = self._get_input_tensors(in_layers)
    temp = []
    subspaces = []
    # creates subspaces the same way it was done in AlphaShare
    for input_tensor in inputs:
      subspace_size = int(input_tensor.get_shape()[-1].value / 2)
      subspaces.append(input_tensor[:, :subspace_size])
      subspaces.append(input_tensor[:, subspace_size:])
      product = tf.matmul(tf.transpose(subspaces[0]), subspaces[1])
      subspaces = []
      # calculate squared Frobenius norm
      temp.append(tf.reduce_sum(tf.pow(product, 2)))
    out_tensor = tf.reduce_sum(temp)
    self.out_tensor = out_tensor
    return out_tensor


class BetaShare(Layer):
  """
  Part of a sluice network. Adds beta params to control which layer
  outputs are used for prediction

  Parameters
  ----------
  in_layers: list of Layers or tensors
    tensors in list must be the same size and list must include two or more tensors

  Returns
  -------
  output_layers: list of Layers or tensors with same size as in_layers
    Distance matrix.
  """

  def __init__(self, **kwargs):
    super(BetaShare, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None, set_tensors=True, **kwargs):
    """
        Size of input layers must all be the same
        """
    inputs = self._get_input_tensors(in_layers)
    subspaces = []
    original_cols = int(inputs[0].get_shape()[-1].value)
    for input_tensor in inputs:
      subspaces.append(tf.reshape(input_tensor, [-1]))
    n_betas = len(inputs)
    subspaces = tf.reshape(tf.stack(subspaces), [n_betas, -1])

    betas = tf.Variable(tf.random_normal([1, n_betas]), name='betas')
    out_tensor = tf.matmul(betas, subspaces)
    self.betas = betas
    self.out_tensor = tf.reshape(out_tensor, [-1, original_cols])
    return self.out_tensor

  def none_tensors(self):
    out_tensor, betas = self.out_tensor, self.betas
    self.out_tensor = None
    self.betas = None
    return out_tensor, betas

  def set_tensors(self, tensor):
    self.out_tensor, self.betas = tensor
+65 −0
Original line number Diff line number Diff line
@@ -48,6 +48,10 @@ from deepchem.models.tensorgraph.layers import TensorWrapper
from deepchem.models.tensorgraph.layers import LSTMStep
from deepchem.models.tensorgraph.layers import AttnLSTMEmbedding
from deepchem.models.tensorgraph.layers import IterRefLSTMEmbedding
from deepchem.models.tensorgraph.layers import AlphaShareLayer
from deepchem.models.tensorgraph.layers import BetaShare
from deepchem.models.tensorgraph.layers import LayerSplitter
from deepchem.models.tensorgraph.layers import SluiceLoss

import deepchem as dc

@@ -626,3 +630,64 @@ class TestLayers(test_util.TensorFlowTestCase):
      assert result == 1.5
      result = sess.run(tf.gradients(v, v))
      assert result[0] == 1.0

  def test_alpha_share_layer(self):
    """Test that alpha share works correctly"""
    batch_size = 50
    length = 10
    test_1 = np.random.rand(batch_size, length)
    test_2 = np.random.rand(batch_size, length)

    with self.test_session() as sess:
      test_1 = tf.convert_to_tensor(test_1, dtype=tf.float32)
      test_2 = tf.convert_to_tensor(test_2, dtype=tf.float32)

      out_tensor = AlphaShareLayer()(test_1, test_2)
      sess.run(tf.global_variables_initializer())
      test_1_out_tensor = out_tensor[0].eval()
      test_2_out_tensor = out_tensor[1].eval()
      assert test_1.shape == test_1_out_tensor.shape
      assert test_2.shape == test_2_out_tensor.shape

  def test_beta_share(self):
    """Test that beta share works correctly"""
    batch_size = 50
    length = 10
    test_1 = np.random.rand(batch_size, length)
    test_2 = np.random.rand(batch_size, length)

    with self.test_session() as sess:
      test_1 = tf.convert_to_tensor(test_1, dtype=tf.float32)
      test_2 = tf.convert_to_tensor(test_2, dtype=tf.float32)

      out_tensor = BetaShare()(test_1, test_2)
      sess.run(tf.global_variables_initializer())
      out_tensor.eval()
      assert test_1.shape == out_tensor.shape
      assert test_2.shape == out_tensor.shape

  def test_layer_splitter(self):
    """Test Layer Splitter"""
    input1 = np.arange(10).reshape(2, 5)
    input2 = np.arange(10, 20).reshape(2, 5)

    with self.test_session() as sess:
      input1 = tf.convert_to_tensor(input1, dtype=tf.float32)
      input2 = tf.convert_to_tensor(input2, dtype=tf.float32)
      input_tensor = tf.stack([input1, input2])
      output1 = LayerSplitter(0)(input_tensor)
      output2 = LayerSplitter(1)(input_tensor)
      sess.run(tf.global_variables_initializer())
      sess.run(tf.assert_equal(input1, output1.eval()))
      sess.run(tf.assert_equal(input2, output2.eval()))

  def test_sluice_loss(self):
    """Test the sluice loss function"""
    input1 = np.ones((3, 4))
    input2 = np.ones((2, 2))
    with self.test_session() as sess:
      input1 = tf.convert_to_tensor(input1, dtype=tf.float32)
      input2 = tf.convert_to_tensor(input2, dtype=tf.float32)
      output_tensor = SluiceLoss()(input1, input2)
      sess.run(tf.global_variables_initializer())
      assert output_tensor.eval() == 40.0
+192 −0
Original line number Diff line number Diff line
"""
Script that trains graph-conv models on Tox21 dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import numpy as np
import six
import sys

from deepchem.models.tensorgraph import TensorGraph
from deepchem.metrics import to_one_hot

from deepchem.feat.mol_graphs import ConvMol
from deepchem.models.tensorgraph.layers import Input, GraphConv, BatchNorm, GraphPool, Dense, GraphGather, \
  SoftMax, SoftMaxCrossEntropy, Concat, WeightedError, Label, Constant, Weights, Feature, AlphaShare, SluiceLoss, Add

np.random.seed(123)
import tensorflow as tf

tf.set_random_seed(123)
import deepchem as dc
from tox21_datasets import load_tox21


def sluice_model(batch_size, tasks):
  model = TensorGraph(
      model_dir=model_dir,
      batch_size=batch_size,
      use_queue=False,
      tensorboard=True)
  atom_features = Feature(shape=(None, 75))
  degree_slice = Feature(shape=(None, 2), dtype=tf.int32)
  membership = Feature(shape=(None,), dtype=tf.int32)

  sluice_loss = []
  deg_adjs = []
  for i in range(0, 10 + 1):
    deg_adj = Feature(shape=(None, i + 1), dtype=tf.int32)
    deg_adjs.append(deg_adj)

  gc1 = GraphConv(
      64,
      activation_fn=tf.nn.relu,
      in_layers=[atom_features, degree_slice, membership] + deg_adjs)

  as1 = AlphaShare(in_layers=[gc1, gc1])
  sluice_loss.append(gc1)

  batch_norm1a = BatchNorm(in_layers=[as1[0]])
  batch_norm1b = BatchNorm(in_layers=[as1[1]])

  gp1a = GraphPool(
      in_layers=[batch_norm1a, degree_slice, membership] + deg_adjs)
  gp1b = GraphPool(
      in_layers=[batch_norm1b, degree_slice, membership] + deg_adjs)

  gc2a = GraphConv(
      64,
      activation_fn=tf.nn.relu,
      in_layers=[gp1a, degree_slice, membership] + deg_adjs)
  gc2b = GraphConv(
      64,
      activation_fn=tf.nn.relu,
      in_layers=[gp1b, degree_slice, membership] + deg_adjs)

  as2 = AlphaShare(in_layers=[gc2a, gc2b])
  sluice_loss.append(gc2a)
  sluice_loss.append(gc2b)

  batch_norm2a = BatchNorm(in_layers=[as2[0]])
  batch_norm2b = BatchNorm(in_layers=[as2[1]])

  gp2a = GraphPool(
      in_layers=[batch_norm2a, degree_slice, membership] + deg_adjs)
  gp2b = GraphPool(
      in_layers=[batch_norm2b, degree_slice, membership] + deg_adjs)

  densea = Dense(out_channels=128, activation_fn=None, in_layers=[gp2a])
  denseb = Dense(out_channels=128, activation_fn=None, in_layers=[gp2b])

  batch_norm3a = BatchNorm(in_layers=[densea])
  batch_norm3b = BatchNorm(in_layers=[denseb])

  as3 = AlphaShare(in_layers=[batch_norm3a, batch_norm3b])
  sluice_loss.append(batch_norm3a)
  sluice_loss.append(batch_norm3b)

  gg1a = GraphGather(
      batch_size=batch_size,
      activation_fn=tf.nn.tanh,
      in_layers=[as3[0], degree_slice, membership] + deg_adjs)
  gg1b = GraphGather(
      batch_size=batch_size,
      activation_fn=tf.nn.tanh,
      in_layers=[as3[1], degree_slice, membership] + deg_adjs)

  costs = []
  labels = []
  count = 0
  for task in tasks:
    if count < len(tasks) / 2:
      classification = Dense(
          out_channels=2, activation_fn=None, in_layers=[gg1a])
      print("first half:")
      print(task)
    else:
      classification = Dense(
          out_channels=2, activation_fn=None, in_layers=[gg1b])
      print('second half')
      print(task)
    count += 1

    softmax = SoftMax(in_layers=[classification])
    model.add_output(softmax)

    label = Label(shape=(None, 2))
    labels.append(label)
    cost = SoftMaxCrossEntropy(in_layers=[label, classification])
    costs.append(cost)

  entropy = Concat(in_layers=costs)
  task_weights = Weights(shape=(None, len(tasks)))
  task_loss = WeightedError(in_layers=[entropy, task_weights])

  s_cost = SluiceLoss(in_layers=sluice_loss)

  total_loss = Add(in_layers=[task_loss, s_cost])
  model.set_loss(total_loss)

  def feed_dict_generator(dataset, batch_size, epochs=1):
    for epoch in range(epochs):
      for ind, (X_b, y_b, w_b, ids_b) in enumerate(
          dataset.iterbatches(batch_size, pad_batches=True)):
        d = {}
        for index, label in enumerate(labels):
          d[label] = to_one_hot(y_b[:, index])
        d[task_weights] = w_b
        multiConvMol = ConvMol.agglomerate_mols(X_b)
        d[atom_features] = multiConvMol.get_atom_features()
        d[degree_slice] = multiConvMol.deg_slice
        d[membership] = multiConvMol.membership
        for i in range(1, len(multiConvMol.get_deg_adjacency_lists())):
          d[deg_adjs[i - 1]] = multiConvMol.get_deg_adjacency_lists()[i]
        yield d

  return model, feed_dict_generator, labels, task_weights


model_dir = "tmp/graphconv"

# Load Tox21 dataset
tox21_tasks, tox21_datasets, transformers = load_tox21(featurizer='GraphConv')
train_dataset, valid_dataset, test_dataset = tox21_datasets
print(train_dataset.data_dir)
print(valid_dataset.data_dir)

# Fit models
metric = dc.metrics.Metric(
    dc.metrics.roc_auc_score, np.mean, mode="classification")

# Batch size of models
batch_size = 100

num_epochs = 10

model, generator, labels, task_weights = sluice_model(batch_size, tox21_tasks)

model.fit_generator(
    generator(train_dataset, batch_size, epochs=num_epochs),
    checkpoint_interval=1000)

print("Evaluating model")
train_scores = model.evaluate_generator(
    generator(train_dataset, batch_size), [metric],
    transformers,
    labels,
    weights=[task_weights],
    per_task_metrics=True)
valid_scores = model.evaluate_generator(
    generator(valid_dataset, batch_size), [metric],
    transformers,
    labels,
    weights=[task_weights],
    per_task_metrics=True)

print("Train scores")
print(train_scores)

print("Validation scores")
print(valid_scores)