Commit d66cefdd authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Formatting fixes

parent 19cc947c
Loading
Loading
Loading
Loading
+71 −54
Original line number Diff line number Diff line
@@ -24,46 +24,65 @@ from deepchem.nn import model_ops
# TODO(rbharath): Find a way to get rid of this import?
from deepchem.models.tf_new_models.graph_topology import merge_dicts


def get_loss_fn(final_loss):
  # Obtain appropriate loss function
  if final_loss == 'L2':

    def loss_fn(x, t):
      diff = tf.sub(x, t)
      return tf.reduce_sum(tf.square(diff), 0)
  elif final_loss == 'weighted_L2':

    def loss_fn(x, t, w):
      diff = tf.sub(x, t)
      weighted_diff = tf.mul(diff, w)
      return tf.reduce_sum(tf.square(weighted_diff), 0)
  elif final_loss == 'L1':

    def loss_fn(x, t):
      diff = tf.sub(x, t)
      return tf.reduce_sum(tf.abs(diff), 0)
  elif final_loss == 'huber':

    def loss_fn(x, t):
      diff = tf.sub(x, t)
      return tf.reduce_sum(
          tf.minimum(0.5 * tf.square(diff),
                     huber_d * (tf.abs(diff) - 0.5 * huber_d)), 0)
  elif final_loss == 'cross_entropy':

    def loss_fn(x, t, w):
      costs = tf.nn.sigmoid_cross_entropy_with_logits(x, t)
      weighted_costs = tf.mul(costs, w)
      return tf.reduce_sum(weighted_costs)
  elif final_loss == 'hinge':

    def loss_fn(x, t, w):
      t = tf.mul(2.0, t) - 1
      costs = tf.maximum(0.0, 1.0 - tf.mul(t, x))
      weighted_costs = tf.mul(costs, w)
      return tf.reduce_sum(weighted_costs)

  return loss_fn


class MultitaskGraphClassifier(Model):

  def __init__(self, model, n_tasks, n_feat, logdir=None, batch_size=50,
               final_loss='cross_entropy', learning_rate=.001,
               optimizer_type="adam", learning_rate_decay_time=1000,
               beta1=.9, beta2=.999, pad_batches=True, verbose=True):
  def __init__(self,
               model,
               n_tasks,
               n_feat,
               logdir=None,
               batch_size=50,
               final_loss='cross_entropy',
               learning_rate=.001,
               optimizer_type="adam",
               learning_rate_decay_time=1000,
               beta1=.9,
               beta2=.999,
               pad_batches=True,
               verbose=True):

    self.verbose = verbose
    self.n_tasks = n_tasks
@@ -126,13 +145,13 @@ class MultitaskGraphClassifier(Model):
    ################################################################# DEBUG

    feat = self.model.return_outputs()
    output = model_ops.multitask_logits(
        feat, self.n_tasks)
    output = model_ops.multitask_logits(feat, self.n_tasks)
    return output

  def add_optimizer(self):
    if self.optimizer_type == "adam":
      self.optimizer = tf.train.AdamOptimizer(self.learning_rate, 
      self.optimizer = tf.train.AdamOptimizer(
          self.learning_rate,
          beta1=self.optimizer_beta1,
          beta2=self.optimizer_beta2,
          epsilon=self.epsilon)
@@ -150,8 +169,7 @@ class MultitaskGraphClassifier(Model):
      y_b = np.zeros((n_samples, self.n_tasks))
    if w_b is None:
      w_b = np.zeros((n_samples, self.n_tasks))
    targets_dict = {self.label_placeholder : y_b,
                    self.weight_placeholder : w_b}
    targets_dict = {self.label_placeholder: y_b, self.weight_placeholder: w_b}

    # Get graph information
    atoms_dict = self.graph_topology.batch_to_feed_dict(X_b)
@@ -181,8 +199,7 @@ class MultitaskGraphClassifier(Model):
          tf.one_hot(tf.to_int32(tf.squeeze(task_label_vector)), 2))
      # Since we use tf.nn.softmax_cross_entropy_with_logits note that we pass in
      # un-softmaxed logits rather than softmax outputs.
      task_loss = loss_fn(logits[task], one_hot_labels,
                          task_weight_vector) 
      task_loss = loss_fn(logits[task], one_hot_labels, task_weight_vector)
      task_losses.append(task_loss)
    # It's ok to divide by just the batch_size rather than the number of nonzero
    # examples (effect averages out)
@@ -198,21 +215,24 @@ class MultitaskGraphClassifier(Model):
        softmax.append(tf.nn.softmax(logits, name='softmax_%d' % i))
    return softmax

  def fit(self, dataset, nb_epoch=10, 
          max_checkpoints_to_keep=5, log_every_N_batches=50, **kwargs):
  def fit(self,
          dataset,
          nb_epoch=10,
          max_checkpoints_to_keep=5,
          log_every_N_batches=50,
          **kwargs):
    # Perform the optimization
    log("Training for %d epochs" % nb_epoch, self.verbose)

    # TODO(rbharath): Disabling saving for now to try to debug.
    for epoch in range(nb_epoch):
      log("Starting epoch %d" % epoch, self.verbose)
      for batch_num, (X_b, y_b, w_b, ids_b) in enumerate(dataset.iterbatches(
          self.batch_size, pad_batches=self.pad_batches)):
      for batch_num, (X_b, y_b, w_b, ids_b) in enumerate(
          dataset.iterbatches(self.batch_size, pad_batches=self.pad_batches)):
        if batch_num % log_every_N_batches == 0:
          log("On batch %d" % batch_num, self.verbose)
        self.sess.run(
            self.train_op,
            feed_dict=self.construct_feed_dict(X_b, y_b, w_b))
            self.train_op, feed_dict=self.construct_feed_dict(X_b, y_b, w_b))

  def save(self):
    """
@@ -228,8 +248,7 @@ class MultitaskGraphClassifier(Model):
  def predict_proba(self, dataset, transformers=[], n_classes=2, **kwargs):
    """Wraps predict_proba to set batch_size/padding."""
    return super(MultitaskGraphClassifier, self).predict_proba(
        dataset, transformers, n_classes=n_classes,
        batch_size=self.batch_size)
        dataset, transformers, n_classes=n_classes, batch_size=self.batch_size)

  def predict_on_batch(self, X):
    """Return model output for the provided input.
@@ -241,8 +260,7 @@ class MultitaskGraphClassifier(Model):
    with self.sess.as_default():
      feed_dict = self.construct_feed_dict(X)
      # Shape (n_samples, n_tasks)
      batch_outputs = self.sess.run(
          self.outputs, feed_dict=feed_dict)
      batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict)

    n_samples = len(X)
    outputs = np.zeros((n_samples, self.n_tasks))
@@ -258,8 +276,7 @@ class MultitaskGraphClassifier(Model):
    n_tasks = self.n_tasks
    with self.sess.as_default():
      feed_dict = self.construct_feed_dict(X)
      batch_outputs = self.sess.run(
          self.outputs, feed_dict=feed_dict)
      batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict)

    n_samples = len(X)
    outputs = np.zeros((n_samples, self.n_tasks, n_classes))
+60 −47
Original line number Diff line number Diff line
@@ -14,12 +14,23 @@ from deepchem.nn import model_ops
from deepchem.models.tf_new_models.graph_topology import merge_dicts
from deepchem.models.tf_new_models.multitask_classifier import get_loss_fn


class MultitaskGraphRegressor(Model):

  def __init__(self, model, n_tasks, n_feat, logdir=None, batch_size=50,
               final_loss='weighted_L2', learning_rate=.001,
               optimizer_type="adam", learning_rate_decay_time=1000,
               beta1=.9, beta2=.999, pad_batches=True, verbose=True):
  def __init__(self,
               model,
               n_tasks,
               n_feat,
               logdir=None,
               batch_size=50,
               final_loss='weighted_L2',
               learning_rate=.001,
               optimizer_type="adam",
               learning_rate_decay_time=1000,
               beta1=.9,
               beta2=.999,
               pad_batches=True,
               verbose=True):

    self.verbose = verbose
    self.n_tasks = n_tasks
@@ -75,20 +86,20 @@ class MultitaskGraphRegressor(Model):
    feat_size = feat.get_shape()[-1].value
    outputs = []
    for task in range(self.n_tasks):
      outputs.append(tf.squeeze(
      outputs.append(
          tf.squeeze(
              model_ops.fully_connected_layer(
                  tensor=feat,
                  size=1,
                  weight_init=tf.truncated_normal(
                  shape=[feat_size, 1],
                  stddev=0.01),
              bias_init=tf.constant(value=0.,
                                    shape=[1]))))
                      shape=[feat_size, 1], stddev=0.01),
                  bias_init=tf.constant(value=0., shape=[1]))))
    return outputs

  def add_optimizer(self):
    if self.optimizer_type == "adam":
      self.optimizer = tf.train.AdamOptimizer(self.learning_rate, 
      self.optimizer = tf.train.AdamOptimizer(
          self.learning_rate,
          beta1=self.optimizer_beta1,
          beta2=self.optimizer_beta2,
          epsilon=self.epsilon)
@@ -106,8 +117,7 @@ class MultitaskGraphRegressor(Model):
      y_b = np.zeros((n_samples, self.n_tasks))
    if w_b is None:
      w_b = np.zeros((n_samples, self.n_tasks))
    targets_dict = {self.label_placeholder : y_b,
                    self.weight_placeholder : w_b}
    targets_dict = {self.label_placeholder: y_b, self.weight_placeholder: w_b}

    # Get graph information
    atoms_dict = self.graph_topology.batch_to_feed_dict(X_b)
@@ -132,7 +142,8 @@ class MultitaskGraphRegressor(Model):
    for task in range(self.n_tasks):
      task_label_vector = task_labels[task]
      task_weight_vector = task_weights[task]
      task_loss = loss_fn(outputs[task], tf.squeeze(task_label_vector),
      task_loss = loss_fn(outputs[task],
                          tf.squeeze(task_label_vector),
                          tf.squeeze(task_weight_vector))
      task_losses.append(task_loss)
    # It's ok to divide by just the batch_size rather than the number of nonzero
@@ -141,21 +152,24 @@ class MultitaskGraphRegressor(Model):
    total_loss = tf.div(total_loss, self.batch_size)
    return total_loss

  def fit(self, dataset, nb_epoch=10, 
          max_checkpoints_to_keep=5, log_every_N_batches=50, **kwargs):
  def fit(self,
          dataset,
          nb_epoch=10,
          max_checkpoints_to_keep=5,
          log_every_N_batches=50,
          **kwargs):
    # Perform the optimization
    log("Training for %d epochs" % nb_epoch, self.verbose)

    # TODO(rbharath): Disabling saving for now to try to debug.
    for epoch in range(nb_epoch):
      log("Starting epoch %d" % epoch, self.verbose)
      for batch_num, (X_b, y_b, w_b, ids_b) in enumerate(dataset.iterbatches(
          self.batch_size, pad_batches=self.pad_batches)):
      for batch_num, (X_b, y_b, w_b, ids_b) in enumerate(
          dataset.iterbatches(self.batch_size, pad_batches=self.pad_batches)):
        if batch_num % log_every_N_batches == 0:
          log("On batch %d" % batch_num, self.verbose)
        self.sess.run(
            self.train_op,
            feed_dict=self.construct_feed_dict(X_b, y_b, w_b))
            self.train_op, feed_dict=self.construct_feed_dict(X_b, y_b, w_b))

  def save(self):
    """
@@ -178,8 +192,7 @@ class MultitaskGraphRegressor(Model):
    with self.sess.as_default():
      feed_dict = self.construct_feed_dict(X)
      # Shape (n_samples, n_tasks)
      batch_outputs = self.sess.run(
          self.outputs, feed_dict=feed_dict)
      batch_outputs = self.sess.run(self.outputs, feed_dict=feed_dict)

    n_samples = len(X)
    outputs = np.zeros((n_samples, self.n_tasks))
+57 −35
Original line number Diff line number Diff line
@@ -23,9 +23,16 @@ from deepchem.data import get_single_task_test
from deepchem.data import get_task_dataset_minus_support
from deepchem.nn.copy import Input


class SupportGraphClassifier(Model):
  def __init__(self, model, test_batch_size=10, support_batch_size=10,
               learning_rate=.001, similarity="cosine", **kwargs):

  def __init__(self,
               model,
               test_batch_size=10,
               support_batch_size=10,
               learning_rate=.001,
               similarity="cosine",
               **kwargs):
    """Builds a support-based classifier.

    See https://arxiv.org/pdf/1606.04080v1.pdf for definition of support.
@@ -77,7 +84,9 @@ class SupportGraphClassifier(Model):
    self.test_label_placeholder = tf.placeholder(
        dtype='float32', shape=(self.test_batch_size), name="label_placeholder")
    self.test_weight_placeholder = tf.placeholder(
        dtype='float32', shape=(self.test_batch_size), name="weight_placeholder")
        dtype='float32',
        shape=(self.test_batch_size),
        name="weight_placeholder")

    # TODO(rbharath): Should weights for the support be used?
    # Support labels
@@ -85,10 +94,10 @@ class SupportGraphClassifier(Model):
    #    tensor=tf.placeholder(dtype='float32', shape=[self.support_batch_size],
    #    name="support_label_placeholder"))
    self.support_label_placeholder = tf.placeholder(
        dtype='float32', shape=[self.support_batch_size],
        dtype='float32',
        shape=[self.support_batch_size],
        name="support_label_placeholder")
    self.phase = tf.placeholder(dtype='bool',
                                name='keras_learning_phase')
    self.phase = tf.placeholder(dtype='bool', name='keras_learning_phase')
    #################################################################### DEBUG

  def construct_feed_dict(self, test, support, training=True, add_phase=False):
@@ -109,8 +118,14 @@ class SupportGraphClassifier(Model):
      feed_dict[self.phase] = training
    return feed_dict

  def fit(self, dataset, n_episodes_per_epoch=1000, nb_epochs=1, n_pos=1, n_neg=9,
          log_every_n_samples=10, **kwargs):
  def fit(self,
          dataset,
          n_episodes_per_epoch=1000,
          nb_epochs=1,
          n_pos=1,
          n_neg=9,
          log_every_n_samples=10,
          **kwargs):
    """Fits model on dataset using cached supports.

    For each epcoh, sample n_episodes_per_epoch (support, test) pairs and does
@@ -139,8 +154,8 @@ class SupportGraphClassifier(Model):
    feed_total, run_total = 0, 0
    for epoch in range(nb_epochs):
      # Create different support sets
      episode_generator = EpisodeGenerator(dataset,
          n_pos, n_neg, n_test, n_episodes_per_epoch)
      episode_generator = EpisodeGenerator(dataset, n_pos, n_neg, n_test,
                                           n_episodes_per_epoch)
      recent_losses = []
      for ind, (task, support, test) in enumerate(episode_generator):
        if ind % log_every_n_samples == 0:
@@ -152,7 +167,8 @@ class SupportGraphClassifier(Model):
        feed_total += (feed_end - feed_start)
        # Train on support set, batch pair
        run_start = time.time()
        _, loss = self.sess.run([self.train_op, self.loss_op], feed_dict=feed_dict)
        _, loss = self.sess.run(
            [self.train_op, self.loss_op], feed_dict=feed_dict)
        run_end = time.time()
        run_total += (run_end - run_start)
        if ind % log_every_n_samples == 0:
@@ -166,7 +182,6 @@ class SupportGraphClassifier(Model):
    print("feed_total: %s" % str(feed_total))
    print("run_total: %s" % str(run_total))


  def save(self):
    """Save all models

@@ -275,12 +290,13 @@ class SupportGraphClassifier(Model):
  def predict_on_batch(self, support, test_batch):
    """Make predictions on batch of data."""
    n_samples = len(test_batch)
    padded_test_batch = NumpyDataset(*pad_batch(
        self.test_batch_size, test_batch.X, test_batch.y, test_batch.w,
        test_batch.ids))
    padded_test_batch = NumpyDataset(*pad_batch(self.test_batch_size,
                                                test_batch.X, test_batch.y,
                                                test_batch.w, test_batch.ids))
    feed_dict = self.construct_feed_dict(padded_test_batch, support)
    # Get scores
    pred, scores = self.sess.run([self.pred_op, self.scores_op], feed_dict=feed_dict)
    pred, scores = self.sess.run(
        [self.pred_op, self.scores_op], feed_dict=feed_dict)
    y_pred_batch = np.round(pred)
    # Remove padded elements
    y_pred_batch = y_pred_batch[:n_samples]
@@ -289,12 +305,13 @@ class SupportGraphClassifier(Model):
  def predict_proba_on_batch(self, support, test_batch):
    """Make predictions on batch of data."""
    n_samples = len(test_batch)
    padded_test_batch = NumpyDataset(*pad_batch(
        self.test_batch_size, test_batch.X, test_batch.y, test_batch.w,
        test_batch.ids))
    padded_test_batch = NumpyDataset(*pad_batch(self.test_batch_size,
                                                test_batch.X, test_batch.y,
                                                test_batch.w, test_batch.ids))
    feed_dict = self.construct_feed_dict(padded_test_batch, support)
    # Get scores
    pred, scores = self.sess.run([self.pred_op, self.scores_op], feed_dict=feed_dict)
    pred, scores = self.sess.run(
        [self.pred_op, self.scores_op], feed_dict=feed_dict)
    # pred corresponds to prob(example == 1) 
    y_pred_batch = np.zeros((n_samples, 2))
    # Remove padded elements
@@ -303,8 +320,13 @@ class SupportGraphClassifier(Model):
    y_pred_batch[:, 0] = 1 - pred
    return y_pred_batch

  def evaluate(self, dataset, metric, n_pos,
               n_neg, n_trials=1000, exclude_support=True):
  def evaluate(self,
               dataset,
               metric,
               n_pos,
               n_neg,
               n_trials=1000,
               exclude_support=True):
    """Evaluate performance on dataset according to metrics


@@ -351,8 +373,8 @@ class SupportGraphClassifier(Model):
        print("Keeping support datapoints for eval.")
        task_dataset = get_task_dataset(dataset, task)
      y_pred = self.predict_proba(support, task_dataset)
      task_scores[task].append(metric.compute_metric(
          task_dataset.y, y_pred, task_dataset.w))
      task_scores[task].append(
          metric.compute_metric(task_dataset.y, y_pred, task_dataset.w))

    # Join information for all tasks.
    mean_task_scores = {}