Commit cd648e62 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Continuing refactor of TensorflowModel

parent 4c14b496
Loading
Loading
Loading
Loading
+105 −108
Original line number Diff line number Diff line
@@ -20,10 +20,49 @@ from deepchem.utils.save import log

class TensorflowGraph(object):
  """Simple class that holds information needed to run Tensorflow graph."""
  def __init__(self, graph, session, name_scopes):
  def __init__(self, graph, session, name_scopes, output, labels, weights, loss):
    self.graph = graph
    self.session = session
    self.name_scopes = name_scopes
    self.output = output
    self.labels = labels
    self.weights = weights
    self.loss = loss

  @staticmethod
  def get_placeholder_scope(graph, name_scopes):
    """Gets placeholder scope."""
    placeholder_root = "placeholders"
    #with graph.as_default():
    #  with tf.name_scope(placeholder_root) as scope:
    #    return scope
    return TensorflowGraph.shared_name_scope(placeholder_root, graph, name_scopes)

  @staticmethod
  def shared_name_scope(name, graph, name_scopes):
    """Returns a singleton TensorFlow scope with the given name.

    Used to prevent '_1'-appended scopes when sharing scopes with child classes.

    Args:
      name: String. Name scope for group of operations.
    Returns:
      tf.name_scope with the provided name.
    """
    with graph.as_default():
      if name not in name_scopes:
        with tf.name_scope(name) as scope:
          name_scopes[name] = scope
      return tf.name_scope(name_scopes[name])

  @staticmethod
  def get_feed_dict(named_values):
    feed_dict = {}
    placeholder_root = "placeholders"
    for name, value in named_values.iteritems():
      feed_dict['{}/{}:0'.format(placeholder_root, name)] = value
    return feed_dict


class TensorflowGraphModel(object):
  """Thin wrapper holding a tensorflow graph and a few vars.
@@ -119,11 +158,11 @@ class TensorflowGraphModel(object):
    # replicated supervisor's default path.
    self._save_path = os.path.join(logdir, 'model.ckpt')

    self.train_graph = self.construct_graph(train=True)
    self.eval_graph = self.construct_graph(train=False)
    self.train_graph = self.construct_graph(training=True)
    self.eval_graph = self.construct_graph(training=False)


  def construct_graph(self, train):
  def construct_graph(self, training):
    """Returns a TensorflowGraph object."""
    graph = tf.Graph() 

@@ -134,62 +173,38 @@ class TensorflowGraphModel(object):
    # when subclass-overridden methods use the same scopes.
    name_scopes = {}

    if train:
      with graph.as_default():
        model_ops.set_training(train)

    # Setup graph
    with graph.as_default():
      with tf.name_scope('core_model'):
        self.build(graph, name_scopes)
      self.add_label_placeholders(graph, name_scopes)
      self.add_weight_placeholders(graph, name_scopes)

    if train:
      self.add_training_cost(graph, name_scopes)
      output = self.build(graph, name_scopes, training)
      labels = self.add_label_placeholders(graph, name_scopes)
      ####################################################### DEBUG
      print("labels")
      print(labels)
      ####################################################### DEBUG
      weights = self.add_example_weight_placeholders(graph, name_scopes)

    if training:
      loss = self.add_training_cost(graph, name_scopes, output, labels, weights)
    else:
      self.add_output_ops(graph)  # add softmax heads
    return TensorflowGraph(graph, shared_session, name_scopes)

  def _get_placeholder_scope(self, graph, name_scopes):
    """Gets placeholder scope."""
    placeholder_root = "placeholders"
    with graph.as_default():
      with tf.name_scope(placeholder_root) as scope:
        return scope

  def _shared_name_scope(self, name, graph, name_scopes):
    """Returns a singleton TensorFlow scope with the given name.

    Used to prevent '_1'-appended scopes when sharing scopes with child classes.

    Args:
      name: String. Name scope for group of operations.
    Returns:
      tf.name_scope with the provided name.
    """
    with graph.as_default():
      if name not in name_scopes:
        with tf.name_scope(name) as scope:
          name_scopes[name] = scope
      placeholder_scope = tf.name_scope(name_scopes[name])
      return placeholder_scope
      loss = None
      output = self.add_output_ops(graph, output)  # add softmax heads
    return TensorflowGraph(graph, shared_session, name_scopes, output, labels,
                           weights, loss)

  def add_training_cost(self, graph, name_scopes):
  def add_training_cost(self, graph, name_scopes, output, labels, weights):
    with graph.as_default():
      self.require_attributes(['output', 'labels', 'weights'])
      epsilon = 1e-3  # small float to avoid dividing by zero
      weighted_costs = []  # weighted costs for each example
      gradient_costs = []  # costs used for gradient calculation

      with self._shared_name_scope('costs', graph, name_scopes):
      with TensorflowGraph.shared_name_scope('costs', graph, name_scopes):
        for task in xrange(self.n_tasks):
          task_str = str(task).zfill(len(str(self.n_tasks)))
          with self._shared_name_scope(
          with TensorflowGraph.shared_name_scope(
              'cost_{}'.format(task_str), graph, name_scopes):
            with tf.name_scope('weighted'):
              weighted_cost = self.cost(self.output[task], self.labels[task],
                                        self.weights[task])
              weighted_cost = self.cost(output[task], labels[task],
                                        weights[task])
              weighted_costs.append(weighted_cost)

            with tf.name_scope('gradient'):
@@ -202,7 +217,7 @@ class TensorflowGraphModel(object):
              gradient_costs.append(gradient_cost)

        # aggregated costs
        with self._shared_name_scope('aggregated', graph, name_scopes):
        with TensorflowGraph.shared_name_scope('aggregated', graph, name_scopes):
          with tf.name_scope('gradient'):
            loss = tf.add_n(gradient_costs)

@@ -211,10 +226,10 @@ class TensorflowGraphModel(object):
            penalty = model_ops.WeightDecay(self.penalty_type, self.penalty)
            loss += penalty

        # loss used for gradient calculation
        self.loss = loss

      return weighted_costs
      ############################################################ DEBUG
      #return weighted_costs
      ############################################################ DEBUG
      return loss 

  def fit(self, dataset, nb_epoch=10, pad_batches=False, shuffle=False,
          max_checkpoints_to_keep=5, log_every_N_batches=50):
@@ -235,9 +250,9 @@ class TensorflowGraphModel(object):
    batch_size = self.batch_size
    step_per_epoch = np.ceil(float(n_datapoints)/batch_size)
    log("Training for %d epochs" % nb_epoch, self.verbosity)
    with self.graph.as_default():
      self.require_attributes(['loss', 'updates'])
      train_op = self.get_training_op()
    with self.train_graph.graph.as_default():
      train_op = self.get_training_op(
          self.train_graph.graph, self.train_graph.loss)
      with self._get_shared_session(train=True) as sess:
        sess.run(tf.initialize_all_variables())
        saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep)
@@ -254,13 +269,17 @@ class TensorflowGraphModel(object):
              log("On batch %d" % ind, self.verbosity)
            # Run training op.
            feed_dict = self.construct_feed_dict(X_b, y_b, w_b, ids_b)
            fetches = self.output + [
                train_op, self.loss, self.updates]
            ######################################################## DEBUG
            print("feed_dict.keys()")
            print(feed_dict.keys())
            ######################################################## DEBUG
            fetches = self.train_graph.output + [
                train_op, self.train_graph.loss]
            fetched_values = sess.run(
                fetches,
                feed_dict=feed_dict)
            output = fetched_values[:len(self.output)]
            _, loss = fetched_values[-3], fetched_values[-2]
            output = fetched_values[:len(self.train_graph.output)]
            loss = fetched_values[-1]
            avg_loss += loss
            y_pred = np.squeeze(np.array(output))
            y_b = y_b.flatten()
@@ -313,7 +332,7 @@ class TensorflowGraphModel(object):
      with self._get_shared_session(train=False).as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session(train=False).run(
            self.output, feed_dict=feed_dict)
            self.eval_graph.output, feed_dict=feed_dict)
        batch_output = np.asarray(data[:n_tasks], dtype=float)
        # reshape to batch_size x n_tasks x ...
        if batch_output.ndim == 3:
@@ -331,16 +350,16 @@ class TensorflowGraphModel(object):

    return np.copy(outputs)

  def add_output_ops(self, graph):
  def add_output_ops(self, graph, output):
    """Replace logits with softmax outputs."""
    with graph.as_default():
      softmax = []
      with tf.name_scope('inference'):
        for i, logits in enumerate(self.output):
        for i, logits in enumerate(output):
          softmax.append(tf.nn.softmax(logits, name='softmax_%d' % i))
      self.output = softmax
      output = softmax

  def build(self, graph):
  def build(self, graph, name_scopes, training):
    """Define the core graph.

    NOTE(user): Operations defined here should be in their own name scope to
@@ -372,7 +391,7 @@ class TensorflowGraphModel(object):
    """
    raise NotImplementedError('Must be overridden by concrete subclass')

  def add_weight_placeholders(self, graph, name_scopes):
  def add_example_weight_placeholders(self, graph, name_scopes):
    """Add Placeholders for example weights for each task.

    This method creates the following Placeholders for each task:
@@ -382,13 +401,13 @@ class TensorflowGraphModel(object):
    feeding and fetching the same tensor.
    """
    weights = []
    placeholder_scope = self._get_placeholder_scope(graph, name_scopes)
    placeholder_scope = TensorflowGraph.get_placeholder_scope(graph, name_scopes)
    for task in xrange(self.n_tasks):
      with tf.name_scope(placeholder_scope):
      with placeholder_scope:
        weights.append(tf.identity(
            tf.placeholder(tf.float32, shape=[None],
                           name='weights_%d' % task)))
    self.weights = weights
    return weights

  def cost(self, output, labels, weights):
    """Calculate single-task training cost for a batch of examples.
@@ -404,7 +423,7 @@ class TensorflowGraphModel(object):
    """
    raise NotImplementedError('Must be overridden by concrete subclass')

  def get_training_op(self):
  def get_training_op(self, graph, loss):
    """Get training op for applying gradients to variables.

    Subclasses that need to do anything fancy with gradients should override
@@ -413,8 +432,9 @@ class TensorflowGraphModel(object):
    Returns:
    A training op.
    """
    with graph.as_default():
      opt = model_ops.Optimizer(self.optimizer, self.learning_rate, self.momentum)
    return opt.minimize(self.loss, name='train')
      return opt.minimize(loss, name='train')

  def _get_shared_session(self, train):
    # allow_soft_placement=True allows ops without a GPU implementation
@@ -430,12 +450,6 @@ class TensorflowGraphModel(object):
        self.eval_graph.session = tf.Session(config=config)
      return self.eval_graph.session

  def _get_feed_dict(self, named_values):
    feed_dict = {}
    for name, value in named_values.iteritems():
      feed_dict['{}/{}:0'.format(self.placeholder_root, name)] = value
    return feed_dict

  def restore(self):
    """Restores the model from the provided training checkpoint.

@@ -444,8 +458,7 @@ class TensorflowGraphModel(object):
    """
    if self._restored_model:
      return
    with self.graph.as_default():
      assert not model_ops.is_training()
    with self.eval_graph.graph.as_default():
      last_checkpoint = self._find_last_checkpoint()

      # TODO(rbharath): Is setting train=Falseright here?
@@ -470,20 +483,6 @@ class TensorflowGraphModel(object):
          pass
    return os.path.join(self.logdir, last_checkpoint)
          
  def require_attributes(self, attrs):
    """Require class attributes to be defined.

    Args:
      attrs: A list of attribute names that must be defined.

    Raises:
      AssertionError: if a required attribute is not defined.
    """
    for attr in attrs:
      if getattr(self, attr, None) is None:
        raise AssertionError(
            'self.%s must be defined by a concrete subclass' % attr)
  
class TensorflowClassifier(TensorflowGraphModel):
  """Classification model.

@@ -525,17 +524,17 @@ class TensorflowClassifier(TensorflowGraphModel):
    Placeholders are wrapped in identity ops to avoid the error caused by
    feeding and fetching the same tensor.
    """
    placeholder_scope = self._get_placeholder_scope(graph, name_scopes)
    placeholder_scope = TensorflowGraph.get_placeholder_scope(graph, name_scopes)
    with graph.as_default():
      batch_size = self.batch_size 
      n_classes = self.n_classes
      labels = []
      for task in xrange(self.n_tasks):
        with tf.name_scope(placeholder_scope):
        with placeholder_scope:
          labels.append(tf.identity(
              tf.placeholder(tf.float32, shape=[None, n_classes],
                             name='labels_%d' % task)))
      self.labels = labels
      return labels


class TensorflowRegressor(TensorflowGraphModel):
@@ -572,7 +571,7 @@ class TensorflowRegressor(TensorflowGraphModel):
    """
    return tf.mul(0.5 * tf.square(output - labels), weights)

  def add_label_placeholders(self, graph, placeholder_scope):
  def add_label_placeholders(self, graph, name_scopes):
    """Add Placeholders for labels for each task.

    This method creates the following Placeholders for each task:
@@ -581,6 +580,7 @@ class TensorflowRegressor(TensorflowGraphModel):
    Placeholders are wrapped in identity ops to avoid the error caused by
    feeding and fetching the same tensor.
    """
    placeholder_scope = TensorflowGraph.get_placeholder_scope(graph, name_scopes)
    with graph.as_default():
      batch_size = self.batch_size
      labels = []
@@ -589,7 +589,7 @@ class TensorflowRegressor(TensorflowGraphModel):
          labels.append(tf.identity(
              tf.placeholder(tf.float32, shape=[None],
                             name='labels_%d' % task)))
      self.labels = labels
    return labels

class TensorflowModel(Model):
  """
@@ -599,35 +599,32 @@ class TensorflowModel(Model):
  def __init__(self, model, logdir, verbosity=None):
    assert verbosity in [None, "low", "high"]
    self.verbosity = verbosity
    if tf_class is None:
      tf_class = TensorflowGraph
    self.train_model = tf_class(logdir, train=True)
    self.eval_model = tf_class(logdir, train=False)
    self.model_instance = model
    self.fit_transformers = None

  def fit(self, dataset, shuffle=False):
    """
    Fits TensorflowGraph to data.
    """
    self.train_model.fit(dataset, shuffle=shuffle)
    self.model_instance.fit(dataset, shuffle=shuffle)

  def predict_on_batch(self, X):
    """
    Makes predictions on batch of data.
    """
    return self.eval_model.predict_on_batch(X)
    return self.model_instance.predict_on_batch(X)

  def predict_grad_on_batch(self, X):
    """
    Calculates gradient of cost function on batch of data.
    """
    return self.eval_model.predict_grad_on_batch(X)
    return self.model_instance.predict_grad_on_batch(X)

  def predict_proba_on_batch(self, X):
    """
    Makes predictions on batch of data.
    """
    return self.eval_model.predict_proba_on_batch(X)
    return self.model_instance.predict_proba_on_batch(X)

  def save(self):
    """
@@ -639,7 +636,7 @@ class TensorflowModel(Model):
    """
    Loads model from disk. Thin wrapper around restore() for consistency.
    """
    self.eval_model.restore()
    self.model_instance.restore()

  def get_num_tasks(self):
    return self.train_model.n_tasks
    return self.model_instance.n_tasks
+26 −19
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ import numpy as np
import tensorflow as tf

from deepchem.metrics import from_one_hot
from deepchem.models.tensorflow_models import TensorflowGraph
from deepchem.models.tensorflow_models import TensorflowClassifier
from deepchem.models.tensorflow_models import TensorflowRegressor
from deepchem.models.tensorflow_models import model_ops
@@ -37,21 +38,26 @@ def softmax(x):
class TensorflowMultiTaskClassifier(TensorflowClassifier):
  """Implements an icml model as configured in a model_config.proto."""

  def build(self, graph, name_scopes):
  def build(self, graph, name_scopes, training):
    """Constructs the graph architecture as specified in its config.

    This method creates the following Placeholders:
      mol_features: Molecule descriptor (e.g. fingerprint) tensor with shape
        batch_size x n_features.
    """
    placeholder_scope = self._get_placeholder_scope(graph, name_scopes)
    placeholder_scope = TensorflowGraph.get_placeholder_scope(
        graph, name_scopes)
    n_features = self.n_features
    with graph.as_default():
      with tf.name_scope(placeholder_scope):
      with placeholder_scope:
        self.mol_features = tf.placeholder(
            tf.float32,
            shape=[None, n_features],
            name='mol_features')
        ########################################################### DEBUG
        print("self.mol_features")
        print(self.mol_features)
        ########################################################### DEBUG

      layer_sizes = self.layer_sizes
      weight_init_stddevs = self.weight_init_stddevs
@@ -78,12 +84,13 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
                stddev=weight_init_stddevs[i]),
            bias_init=tf.constant(value=bias_init_consts[i],
                                  shape=[layer_sizes[i]])))
        layer = model_ops.Dropout(layer, dropouts[i])
        layer = model_ops.Dropout(layer, dropouts[i], training)
        prev_layer = layer
        prev_layer_size = layer_sizes[i]

      self.output = model_ops.MultitaskLogits(
      output = model_ops.MultitaskLogits(
          layer, self.n_tasks)
    return output

  def construct_feed_dict(self, X_b, y_b=None, w_b=None, ids_b=None):
    """Construct a feed dictionary from minibatch data.
@@ -111,7 +118,7 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
        # Dummy placeholders
        orig_dict["weights_%d" % task] = np.ones(
            (self.batch_size,)) 
    return self._get_feed_dict(orig_dict)
    return TensorflowGraph.get_feed_dict(orig_dict)

  def predict_proba_on_batch(self, X):
    """Return model output for the provided input.
@@ -134,16 +141,14 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
    if not self._restored_model:
      self.restore()
    with self.eval_graph.graph.as_default():
      assert not model_ops.is_training()
      self.require_attributes(['output'])

      # run eval data through the model
      n_tasks = self.n_tasks
      outputs = []
      with self._get_shared_session().as_default():
      with self._get_shared_session(train=False).as_default():
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session().run(
            self.output, feed_dict=feed_dict)
        data = self._get_shared_session(train=False).run(
            self.eval_graph.output, feed_dict=feed_dict)
        batch_outputs = np.asarray(data[:n_tasks], dtype=float)
        # reshape to batch_size x n_tasks x ...
        if batch_outputs.ndim == 3:
@@ -164,7 +169,7 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
class TensorflowMultiTaskRegressor(TensorflowRegressor):
  """Implements an icml model as configured in a model_config.proto."""

  def build(self, graph):
  def build(self, graph, name_scopes, training):
    """Constructs the graph architecture as specified in its config.

    This method creates the following Placeholders:
@@ -172,8 +177,10 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
        batch_size x n_features.
    """
    n_features = self.n_inputs
    placeholder_scope = TensorflowGraph.get_placeholder_scope(
        graph, name_scopes)
    with graph.as_default():
      with tf.name_scope(self.placeholder_scope):
      with tf.name_scope(placeholder_scope):
        self.mol_features = tf.placeholder(
            tf.float32,
            shape=[None, n_features],
@@ -208,7 +215,7 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
        prev_layer = layer
        prev_layer_size = layer_sizes[i]

      self.output = []
      output = []
      for task in range(self.n_tasks):
        self.output.append(tf.squeeze(
            model_ops.FullyConnectedLayer(
@@ -219,6 +226,7 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
                    stddev=weight_init_stddevs[i]),
                bias_init=tf.constant(value=bias_init_consts[i],
                                      shape=[1]))))
      return output

  def construct_feed_dict(self, X_b, y_b=None, w_b=None, ids_b=None):
    """Construct a feed dictionary from minibatch data.
@@ -246,7 +254,7 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
        # Dummy placeholders
        orig_dict["weights_%d" % task] = np.ones(
            (self.batch_size,)) 
    return self._get_feed_dict(orig_dict)
    return TensorflowGraph.get_feed_dict(orig_dict)

  def predict_on_batch(self, X):
    """Return model output for the provided input.
@@ -270,21 +278,21 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
    """
    if not self._restored_model:
      self.restore()
    with self.graph.as_default():
    with self.train_graph.graph.as_default():
      assert not model_ops.is_training()
      self.require_attributes(['output'])

      # run eval data through the model
      n_tasks = self.n_tasks
      outputs = []
      with self._get_shared_session().as_default():
      with self._get_shared_session(train=False).as_default():
        n_samples = len(X)
        # Some tensorflow models can't handle variadic batches,
        # especially models using tf.pack, tf.split. Pad batch-size
        # to handle these cases.
        X = pad_features(self.batch_size, X)
        feed_dict = self.construct_feed_dict(X)
        data = self._get_shared_session().run(
        data = self._get_shared_session(train=False).run(
            self.output, feed_dict=feed_dict)
        batch_outputs = np.asarray(data[:n_tasks], dtype=float)
        # reshape to batch_size x n_tasks x ...
@@ -309,4 +317,3 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
        outputs = np.squeeze(np.concatenate(outputs)) 

    return np.copy(outputs)
+7 −5
Original line number Diff line number Diff line
@@ -33,8 +33,8 @@ def AddBias(tensor, init=None, name=None):
    return tf.nn.bias_add(tensor, b)


def BatchNormalize(tensor, convolution, mask=None, epsilon=0.001,
                   scale_after_normalization=True, decay=0.999,
def BatchNormalize(tensor, convolution, training=True, mask=None,
                   epsilon=0.001, scale_after_normalization=True, decay=0.999,
                   global_step=None, name=None):
  """Batch normalization.

@@ -86,7 +86,7 @@ def BatchNormalize(tensor, convolution, mask=None, epsilon=0.001,
    # moving averages from training.
    mean_moving_average = MovingAverage(mean, global_step, decay)
    variance_moving_average = MovingAverage(variance, global_step, decay)
    if not is_training():
    if training:
      mean = mean_moving_average
      variance = variance_moving_average

@@ -130,7 +130,7 @@ def MovingAverage(tensor, global_step, decay=0.999):
  return exponential_moving_average.average(tensor)


def Dropout(tensor, dropout_prob, training_only=True):
def Dropout(tensor, dropout_prob, training=True, training_only=True):
  """Random dropout.

  This implementation supports "always-on" dropout (training_only=False), which
@@ -157,7 +157,7 @@ def Dropout(tensor, dropout_prob, training_only=True):
  if not dropout_prob:
    return tensor  # do nothing
  keep_prob = 1.0 - dropout_prob
  if is_training() or not training_only:
  if training or not training_only:
    tensor = tf.nn.dropout(tensor, keep_prob)
  return tensor

@@ -193,6 +193,7 @@ def FullyConnectedLayer(tensor, size, weight_init=None, bias_init=None,
    b = tf.Variable(bias_init, name='b', dtype=tf.float32)
    return tf.nn.xw_plus_b(tensor, w, b)

'''
def is_training():
  """Determine whether the default graph is in training mode.

@@ -210,6 +211,7 @@ def is_training():
  elif len(train) > 1:
    raise ValueError('Training mode has more than one setting.')
  return train[0]
'''

def WeightDecay(penalty_type, penalty):
  """Add weight decay.