Commit ed7782f0 authored by Peter Eastman's avatar Peter Eastman
Browse files

Use queue in TensorflowGraphModel

parent 367bea14
Loading
Loading
Loading
Loading
+19 −4
Original line number Diff line number Diff line
@@ -95,20 +95,35 @@ class TensorflowMultiTaskIRVClassifier(TensorflowLogisticRegression):
    with graph.as_default():
      output = []
      with placeholder_scope:
        self.features = tf.placeholder(
        mol_features = tf.placeholder(
            tf.float32, shape=[None, self.n_features], name='mol_features')
      with tf.name_scope('variable'):
        V = tf.Variable(tf.constant([0.01, 1.]), name="vote", dtype=tf.float32)
        W = tf.Variable(tf.constant([1., 1.]), name="w", dtype=tf.float32)
        b = tf.Variable(tf.constant([0.01]), name="b", dtype=tf.float32)
        b2 = tf.Variable(tf.constant([0.01]), name="b2", dtype=tf.float32)

      label_placeholders = self.add_label_placeholders(graph, name_scopes)
      weight_placeholders = self.add_example_weight_placeholders(graph, name_scopes)
      if training:
        graph.queue = tf.FIFOQueue(capacity=5, dtypes=[tf.float32]*(len(label_placeholders)+len(weight_placeholders)+1))
        graph.enqueue = graph.queue.enqueue([mol_features]+label_placeholders+weight_placeholders)
        queue_outputs = graph.queue.dequeue()
        labels = queue_outputs[1:len(label_placeholders)+1]
        weights = queue_outputs[len(label_placeholders)+1:]
        features = queue_outputs[0]
      else:
        labels = label_placeholders
        weights = weight_placeholders
        features = mol_features

      for count in range(self.n_tasks):
        similarity = self.features[:, 2 * K * count:(2 * K * count + K)]
        similarity = features[:, 2 * K * count:(2 * K * count + K)]
        ys = tf.to_int32(
            self.features[:, (2 * K * count + K):2 * K * (count + 1)])
            features[:, (2 * K * count + K):2 * K * (count + 1)])
        R = b + W[0] * similarity + W[1] * tf.constant(
            np.arange(K) + 1, dtype=tf.float32)
        R = tf.sigmoid(R)
        z = tf.reduce_sum(R * tf.gather(V, ys), axis=1) + b2
        output.append(tf.reshape(z, shape=[-1, 1]))
    return output
    return (output, labels, weights)
+45 −24
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ import numpy as np
import pandas as pd
import tensorflow as tf
import tempfile
import threading
from deepchem.models import Model
from deepchem.metrics import from_one_hot
from deepchem.nn import model_ops
@@ -239,9 +240,7 @@ class TensorflowGraphModel(Model):
    with graph.as_default():
      if seed is not None:
        tf.set_random_seed(seed)
      output = self.build(graph, name_scopes, training)
      labels = self.add_label_placeholders(graph, name_scopes)
      weights = self.add_example_weight_placeholders(graph, name_scopes)
      (output, labels, weights) = self.build(graph, name_scopes, training)

    if training:
      loss = self.add_training_cost(graph, name_scopes, output, labels, weights)
@@ -295,6 +294,7 @@ class TensorflowGraphModel(Model):

      return loss


  def fit(self,
          dataset,
          nb_epoch=10,
@@ -335,33 +335,54 @@ class TensorflowGraphModel(Model):
        saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep)
        # Save an initial checkpoint.
        saver.save(sess, self._save_path, global_step=0)

        # Define the code that runs on a separate thread to feed data into the queue.
        def enqueue(sess, dataset, nb_epoch, epoch_end_indices):
          index = 0
          for epoch in range(nb_epoch):
          avg_loss, n_batches = 0., 0
          for ind, (X_b, y_b, w_b, ids_b) in enumerate(
              # Turns out there are valid cases where we don't want pad-batches
              # on by default.
              #dataset.iterbatches(batch_size, pad_batches=True)):
              dataset.iterbatches(
                  self.batch_size, pad_batches=self.pad_batches)):
            if ind % log_every_N_batches == 0:
              log("On batch %d" % ind, self.verbose)
            # Run training op.
            for X_b, y_b, w_b, ids_b in dataset.iterbatches(self.batch_size, pad_batches=self.pad_batches):
              feed_dict = self.construct_feed_dict(X_b, y_b, w_b, ids_b)
              sess.run(self.train_graph.graph.enqueue, feed_dict=feed_dict)
              index += 1
            epoch_end_indices.append(index)
          sess.run(self.train_graph.graph.queue.close())

        epoch_end_indices = []
        enqueue_thread = threading.Thread(target=enqueue, args=[sess, dataset, nb_epoch, epoch_end_indices])
        enqueue_thread.daemon = True
        enqueue_thread.start()

        # Main training loop.
        try:
          epoch = 0
          index = 0
          index_in_epoch = 0
          avg_loss = 0.0
          while True:
            if index_in_epoch % log_every_N_batches == 0:
              log("On batch %d" % index_in_epoch, self.verbose)
            # Run training op.
            fetches = self.train_graph.output + [
                train_op, self.train_graph.loss
            ]
            fetched_values = sess.run(fetches, feed_dict=feed_dict)
            output = fetched_values[:len(self.train_graph.output)]
            fetched_values = sess.run(fetches)
            loss = fetched_values[-1]
            avg_loss += loss
            y_pred = np.squeeze(np.array(output))
            y_b = y_b.flatten()
            n_batches += 1
            index += 1
            index_in_epoch += 1
            if len(epoch_end_indices) > 0 and index >= epoch_end_indices[0]:
              # We have reached the end of an epoch.
              if epoch % checkpoint_interval == checkpoint_interval - 1:
                saver.save(sess, self._save_path, global_step=epoch)
          avg_loss = float(avg_loss) / n_batches
              avg_loss = float(avg_loss) / index_in_epoch
              log('Ending epoch %d: Average loss %g' % (epoch, avg_loss),
                  self.verbose)
              index_in_epoch = 0
              avg_loss = 0.0
              del epoch_end_indices[0]
        except tf.errors.OutOfRangeError:
          # We have reached the end of the data.
          pass
        # Always save a final checkpoint when complete.
        saver.save(sess, self._save_path, global_step=epoch + 1)
    ############################################################## TIMING
+32 −6
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
    n_features = self.n_features
    with graph.as_default():
      with placeholder_scope:
        self.mol_features = tf.placeholder(
        mol_features = tf.placeholder(
            tf.float32, shape=[None, n_features], name='mol_features')

      layer_sizes = self.layer_sizes
@@ -50,7 +50,20 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
      n_layers = lengths_set.pop()
      assert n_layers > 0, 'Must have some layers defined.'

      prev_layer = self.mol_features
      label_placeholders = self.add_label_placeholders(graph, name_scopes)
      weight_placeholders = self.add_example_weight_placeholders(graph, name_scopes)
      if training:
        graph.queue = tf.FIFOQueue(capacity=5, dtypes=[tf.float32]*(len(label_placeholders)+len(weight_placeholders)+1))
        graph.enqueue = graph.queue.enqueue([mol_features]+label_placeholders+weight_placeholders)
        queue_outputs = graph.queue.dequeue()
        labels = queue_outputs[1:len(label_placeholders)+1]
        weights = queue_outputs[len(label_placeholders)+1:]
        prev_layer = queue_outputs[0]
      else:
        labels = label_placeholders
        weights = weight_placeholders
        prev_layer = mol_features

      prev_layer_size = n_features
      for i in range(n_layers):
        layer = tf.nn.relu(
@@ -67,7 +80,7 @@ class TensorflowMultiTaskClassifier(TensorflowClassifier):
        prev_layer_size = layer_sizes[i]

      output = model_ops.multitask_logits(layer, self.n_tasks)
    return output
    return (output, labels, weights)

  def construct_feed_dict(self, X_b, y_b=None, w_b=None, ids_b=None):
    """Construct a feed dictionary from minibatch data.
@@ -112,7 +125,7 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
                                                              name_scopes)
    with graph.as_default():
      with placeholder_scope:
        self.mol_features = tf.placeholder(
        mol_features = tf.placeholder(
            tf.float32, shape=[None, n_features], name='mol_features')

      layer_sizes = self.layer_sizes
@@ -129,7 +142,20 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
      n_layers = lengths_set.pop()
      assert n_layers > 0, 'Must have some layers defined.'

      prev_layer = self.mol_features
      label_placeholders = self.add_label_placeholders(graph, name_scopes)
      weight_placeholders = self.add_example_weight_placeholders(graph, name_scopes)
      if training:
        graph.queue = tf.FIFOQueue(capacity=5, dtypes=[tf.float32]*(len(label_placeholders)+len(weight_placeholders)+1))
        graph.enqueue = graph.queue.enqueue([mol_features]+label_placeholders+weight_placeholders)
        queue_outputs = graph.queue.dequeue()
        labels = queue_outputs[1:len(label_placeholders)+1]
        weights = queue_outputs[len(label_placeholders)+1:]
        prev_layer = queue_outputs[0]
      else:
        labels = label_placeholders
        weights = weight_placeholders
        prev_layer = mol_features

      prev_layer_size = n_features
      for i in range(n_layers):
        layer = tf.nn.relu(
@@ -157,7 +183,7 @@ class TensorflowMultiTaskRegressor(TensorflowRegressor):
                        stddev=weight_init_stddevs[i]),
                    bias_init=tf.constant(value=bias_init_consts[i], shape=[1
                                                                           ]))))
      return output
    return (output, labels, weights)

  def construct_feed_dict(self, X_b, y_b=None, w_b=None, ids_b=None):
    """Construct a feed dictionary from minibatch data.
+0 −3
Original line number Diff line number Diff line
@@ -827,9 +827,6 @@ def fully_connected_layer(tensor,
  #print("tensor")
  #print(tensor)
  ###################################################### DEBUG
  if len(tensor.get_shape()) != 2:
    raise ValueError('Dense layer input must be 2D, not %dD' %
                     len(tensor.get_shape()))
  if weight_init is None:
    num_features = tensor.get_shape()[-1].value
    weight_init = tf.truncated_normal([num_features, size], stddev=0.01)