Commit b5b7abde authored by leswing's avatar leswing
Browse files

YAPF

parent c09ca92e
Loading
Loading
Loading
Loading
+28 −24
Original line number Diff line number Diff line
@@ -1035,6 +1035,7 @@ class DiskDataset(Dataset):


class Databag(object):

  def __init__(self):
    self.datasets = dict()

@@ -1049,7 +1050,10 @@ class Databag(object):
    else:
      epochs = 1
    for epoch in range(epochs):
      iterators = [self.datasets[x].iterbatches(deterministic=True, **kwargs) for x in key_order]
      iterators = [
          self.datasets[x].iterbatches(deterministic=True, **kwargs)
          for x in key_order
      ]
      for tup in six.moves.zip(*iterators):
        m_d = {key_order[i]: tup[i][0] for i in range(len(key_order))}
        yield m_d
+58 −35
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ from deepchem.nn import model_ops, initializations


class Layer(object):

  def __init__(self, in_layers=None, **kwargs):
    if "name" not in kwargs:
      self.name = "%s%s" % (self.__class__.__name__, self._random_name())
@@ -50,6 +51,7 @@ class Layer(object):


class Conv1DLayer(Layer):

  def __init__(self, width, out_channels, **kwargs):
    self.width = width
    self.out_channels = out_channels
@@ -74,6 +76,7 @@ class Conv1DLayer(Layer):


class Dense(Layer):

  def __init__(self, out_channels, activation_fn=None, **kwargs):
    self.out_channels = out_channels
    self.out_tensor = None
@@ -101,6 +104,7 @@ class Dense(Layer):


class Flatten(Layer):

  def __init__(self, **kwargs):
    super(Flatten, self).__init__(**kwargs)

@@ -118,6 +122,7 @@ class Flatten(Layer):


class Reshape(Layer):

  def __init__(self, shape, **kwargs):
    self.shape = shape
    super(Reshape, self).__init__(**kwargs)
@@ -128,6 +133,7 @@ class Reshape(Layer):


class CombineMeanStd(Layer):

  def __init__(self, **kwargs):
    super(CombineMeanStd, self).__init__(**kwargs)

@@ -142,6 +148,7 @@ class CombineMeanStd(Layer):


class Repeat(Layer):

  def __init__(self, n_times, **kwargs):
    self.n_times = n_times
    super(Repeat, self).__init__(**kwargs)
@@ -156,6 +163,7 @@ class Repeat(Layer):


class GRU(Layer):

  def __init__(self, n_hidden, out_channels, batch_size, **kwargs):
    self.n_hidden = n_hidden
    self.out_channels = out_channels
@@ -178,6 +186,7 @@ class GRU(Layer):


class TimeSeriesDense(Layer):

  def __init__(self, out_channels, **kwargs):
    self.out_channels = out_channels
    super(TimeSeriesDense, self).__init__(**kwargs)
@@ -192,6 +201,7 @@ class TimeSeriesDense(Layer):


class Input(Layer):

  def __init__(self, shape, dtype=tf.float32, **kwargs):
    self.shape = shape
    self.dtype = dtype
@@ -208,31 +218,32 @@ class Input(Layer):

  def create_pre_q(self, batch_size):
    q_shape = (batch_size,) + self.shape[1:]
    return Input(
      shape=q_shape,
      name="%s_pre_q" % self.name,
      dtype=self.dtype)
    return Input(shape=q_shape, name="%s_pre_q" % self.name, dtype=self.dtype)

  def get_pre_q_name(self):
    return "%s_pre_q" % self.name


class Feature(Input):

  def __init__(self, **kwargs):
    super(Feature, self).__init__(**kwargs)


class Label(Input):

  def __init__(self, **kwargs):
    super(Label, self).__init__(**kwargs)


class Weights(Input):

  def __init__(self, **kwargs):
    super(Weights, self).__init__(**kwargs)


class LossLayer(Layer):

  def __init__(self, **kwargs):
    super(LossLayer, self).__init__(**kwargs)

@@ -244,6 +255,7 @@ class LossLayer(Layer):


class SoftMax(Layer):

  def __init__(self, **kwargs):
    super(SoftMax, self).__init__(**kwargs)

@@ -256,6 +268,7 @@ class SoftMax(Layer):


class Concat(Layer):

  def __init__(self, **kwargs):
    super(Concat, self).__init__(**kwargs)

@@ -270,6 +283,7 @@ class Concat(Layer):


class SoftMaxCrossEntropy(Layer):

  def __init__(self, **kwargs):
    super(SoftMaxCrossEntropy, self).__init__(**kwargs)

@@ -284,6 +298,7 @@ class SoftMaxCrossEntropy(Layer):


class ReduceMean(Layer):

  def _create_tensor(self):
    if len(self.in_layers) > 1:
      out_tensors = [x.out_tensor for x in self.in_layers]
@@ -296,6 +311,7 @@ class ReduceMean(Layer):


class ReduceSquareDifference(Layer):

  def __init__(self, **kwargs):
    super(ReduceSquareDifference, self).__init__(**kwargs)

@@ -307,6 +323,7 @@ class ReduceSquareDifference(Layer):


class Conv2d(Layer):

  def __init__(self, num_outputs, kernel_size=5, **kwargs):
    self.num_outputs = num_outputs
    self.kernel_size = kernel_size
@@ -325,6 +342,7 @@ class Conv2d(Layer):


class MaxPool(Layer):

  def __init__(self,
               ksize=[1, 2, 2, 1],
               strides=[1, 2, 2, 1],
@@ -378,6 +396,7 @@ class InputFifoQueue(Layer):


class GraphConvLayer(Layer):

  def __init__(self,
               out_channel,
               min_deg=0,
@@ -488,6 +507,7 @@ class GraphConvLayer(Layer):


class GraphPoolLayer(Layer):

  def __init__(self, min_degree=0, max_degree=10, **kwargs):
    self.min_degree = min_degree
    self.max_degree = max_degree
@@ -533,6 +553,7 @@ class GraphPoolLayer(Layer):


class GraphGather(Layer):

  def __init__(self, batch_size, activation_fn=None, **kwargs):
    self.batch_size = batch_size
    self.activation_fn = activation_fn
@@ -575,6 +596,7 @@ class GraphGather(Layer):


class BatchNormLayer(Layer):

  def _create_tensor(self):
    parent_tensor = self.in_layers[0].out_tensor
    self.out_tensor = tf.layers.batch_normalization(parent_tensor)
@@ -582,6 +604,7 @@ class BatchNormLayer(Layer):


class WeightedError(Layer):

  def _create_tensor(self):
    entropy, weights = self.in_layers[0], self.in_layers[1]
    self.out_tensor = tf.reduce_sum(entropy.out_tensor * weights.out_tensor)
+50 −24
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ from deepchem.utils.evaluate import GeneratorEvaluator


class TensorGraph(Model):

  def __init__(self,
               tensorboard=False,
               tensorboard_log_frequency=100,
@@ -90,14 +91,15 @@ class TensorGraph(Model):
          nb_epoch=10,
          max_checkpoints_to_keep=5,
          checkpoint_interval=1000):
    return self.fit_generator(self.default_generator(dataset, epochs=nb_epoch),
                              max_checkpoints_to_keep,
                              checkpoint_interval)
    return self.fit_generator(
        self.default_generator(dataset, epochs=nb_epoch),
        max_checkpoints_to_keep, checkpoint_interval)

  def fit_generator(self,
                    feed_dict_generator,
                    max_checkpoints_to_keep=5,
                    checkpoint_interval=1000):

    def create_feed_dict():
      if self.use_queue:
        while True:
@@ -119,7 +121,8 @@ class TensorGraph(Model):
        if self.use_queue:
          enqueue_thread = threading.Thread(
              target=_enqueue_batch,
            args=(self, feed_dict_generator, self._get_tf("Graph"), sess, coord))
              args=(self, feed_dict_generator, self._get_tf("Graph"), sess,
                    coord))
          enqueue_thread.start()
        output_tensors = [x.out_tensor for x in self.outputs]
        fetches = output_tensors + [train_op, self.loss.out_tensor]
@@ -132,16 +135,19 @@ class TensorGraph(Model):
            self.global_step += 1
            n_samples += 1
            if self.tensorboard and n_samples % self.tensorboard_log_frequency == 0:
              summary = sess.run(self._get_tf("summary_op"), feed_dict=feed_dict)
              summary = sess.run(
                  self._get_tf("summary_op"), feed_dict=feed_dict)
              self._log_tensorboard(summary)
          except OutOfRangeError:
            break
          if self.global_step % checkpoint_interval == checkpoint_interval - 1:
            saver.save(sess, self.save_file, global_step=self.global_step)
            avg_loss = float(avg_loss) / n_batches
            print('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss))
            print('Ending global_step %d: Average loss %g' %
                  (self.global_step, avg_loss))
        avg_loss = float(avg_loss) / n_batches
        print('Ending global_step %d: Average loss %g' % (self.global_step, avg_loss))
        print('Ending global_step %d: Average loss %g' %
              (self.global_step, avg_loss))
        saver.save(sess, self.save_file, global_step=self.global_step)
        self.last_checkpoint = saver.last_checkpoints[-1]
      ############################################################## TIMING
@@ -169,7 +175,11 @@ class TensorGraph(Model):
    dataset = NumpyDataset(X, y)
    return self.fit(dataset, nb_epoch=1)

  def default_generator(self, dataset, epochs=1, predict=False, pad_batches=True):
  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        pad_batches=True):
    if len(self.features) > 1:
      raise ValueError("More than one Feature, must use generator")
    if len(self.labels) > 1:
@@ -177,7 +187,8 @@ class TensorGraph(Model):
    if len(self.task_weights) > 1:
      raise ValueError("More than one Weights, must use generator")
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(batch_size=self.batch_size,
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=True,
          pad_batches=pad_batches):
        feed_dict = dict()
@@ -220,7 +231,10 @@ class TensorGraph(Model):
      out_tensors = [x.out_tensor for x in self.outputs]
      results = []
      for feed_dict in generator:
        feed_dict = {self.layers[k.name].out_tensor: v for k, v in six.iteritems(feed_dict)}
        feed_dict = {
            self.layers[k.name].out_tensor: v
            for k, v in six.iteritems(feed_dict)
        }
        result = np.array(session.run(out_tensors, feed_dict=feed_dict))
        if len(result.shape) == 3:
          result = np.transpose(result, axes=[1, 0, 2])
@@ -366,12 +380,24 @@ class TensorGraph(Model):
      self.built = True
    self.tensor_objects = tensor_objects

  def evaluate_generator(self, dataset, metrics, transformers=[],
                         labels=None, outputs=None, weights=[], per_task_metrics=False):
  def evaluate_generator(self,
                         dataset,
                         metrics,
                         transformers=[],
                         labels=None,
                         outputs=None,
                         weights=[],
                         per_task_metrics=False):

    if labels is None:
      raise ValueError
    evaluator = GeneratorEvaluator(self, dataset, transformers, labels=labels, outputs=outputs, weights=weights)
    evaluator = GeneratorEvaluator(
        self,
        dataset,
        transformers,
        labels=labels,
        outputs=outputs,
        weights=weights)
    if not per_task_metrics:
      scores = evaluator.compute_model_performance(metrics)
      return scores
+17 −12
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ import os

from data import NumpyDataset

from data.datasets import Databag
from deepchem.data.datasets import Databag
from deepchem.models.tensorgraph.layers import Input, Dense, LossLayer, Flatten, ReduceSquareDifference
from deepchem.models.tensorgraph.layers import Layer, Input, Reshape, Flatten, Feature, Conv2d, MaxPool, Label
from deepchem.models.tensorgraph.layers import Dense, SoftMaxCrossEntropy, ReduceMean, SoftMax
@@ -74,7 +74,9 @@ class TestTensorGraph(unittest.TestCase):
      tg.add_output(output)
    tg.set_loss(total_loss)

    tg.fit_generator(databag.iterbatches(epochs=100, batch_size=tg.batch_size, pad_batches=True))
    tg.fit_generator(
        databag.iterbatches(
            epochs=100, batch_size=tg.batch_size, pad_batches=True))
    prediction = tg.predict_proba_on_generator(databag.iterbatches())
    for i in range(2):
      y_real = ys[i].X
@@ -131,7 +133,9 @@ class TestTensorGraph(unittest.TestCase):
      tg.add_output(output)
    tg.set_loss(total_loss)

    tg.fit_generator(databag.iterbatches(epochs=200, batch_size=tg.batch_size, pad_batches=True))
    tg.fit_generator(
        databag.iterbatches(
            epochs=200, batch_size=tg.batch_size, pad_batches=True))
    prediction = tg.predict_proba_on_generator(databag.iterbatches())
    for i in range(2):
      y_real = ys[i].X
@@ -142,7 +146,8 @@ class TestTensorGraph(unittest.TestCase):
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    train = dc.data.NumpyDataset(mnist.train.images, mnist.train.labels)
    valid = dc.data.NumpyDataset(mnist.validation.images, mnist.validation.labels)
    valid = dc.data.NumpyDataset(mnist.validation.images,
                                 mnist.validation.labels)

    # Images are square 28x28 (batch, height, width, channel)
    feature = Feature(shape=(None, 784), name="Feature")
@@ -155,7 +160,8 @@ class TestTensorGraph(unittest.TestCase):
    maxpool_2 = MaxPool(in_layers=[conv2d_2])
    flatten = Flatten(in_layers=[maxpool_2])

    dense1 = Dense(out_channels=1024, activation_fn=tf.nn.relu, in_layers=[flatten])
    dense1 = Dense(
        out_channels=1024, activation_fn=tf.nn.relu, in_layers=[flatten])
    dense2 = Dense(out_channels=10, in_layers=[dense1])
    label = Label(shape=(None, 10), name="Label")
    smce = SoftMaxCrossEntropy(in_layers=[label, dense2])
@@ -163,9 +169,7 @@ class TestTensorGraph(unittest.TestCase):
    output = SoftMax(in_layers=[dense2])

    tg = dc.models.TensorGraph(
      model_dir='/tmp/mnist',
      batch_size=1000,
      use_queue=True)
        model_dir='/tmp/mnist', batch_size=1000, use_queue=True)
    tg.add_output(output)
    tg.set_loss(loss)
    tg.fit(train, nb_epoch=2)
@@ -211,7 +215,8 @@ class TestTensorGraph(unittest.TestCase):
    label = Label(shape=(None, 2))
    smce = SoftMaxCrossEntropy(in_layers=[label, dense])
    loss = ReduceMean(in_layers=[smce])
    tg = dc.models.TensorGraph(tensorboard=True,
    tg = dc.models.TensorGraph(
        tensorboard=True,
        tensorboard_log_frequency=1,
        learning_rate=0.1,
        model_dir='/tmp/tensorgraph')
+16 −12
Original line number Diff line number Diff line
@@ -134,8 +134,14 @@ class Evaluator(object):
class GeneratorEvaluator(object):
  """Class that evaluates a model on a given dataset."""

  def __init__(self, model, generator, transformers,
               labels, outputs=None, weights=list(), verbose=False):
  def __init__(self,
               model,
               generator,
               transformers,
               labels,
               outputs=None,
               weights=list(),
               verbose=False):
    self.model = model
    self.generator = generator
    self.output_transformers = [
@@ -151,9 +157,7 @@ class GeneratorEvaluator(object):
      raise ValueError("Must have same number of labels and outputs")
    self.verbose = verbose

  def compute_model_performance(self,
                                metrics,
                                per_task_metrics=False):
  def compute_model_performance(self, metrics, per_task_metrics=False):
    """
    Computes statistics of model on test data and saves results to csv.

Loading