Commit 285cb6a3 authored by leswing's avatar leswing
Browse files

Added Documentation

parent b5b7abde
Loading
Loading
Loading
Loading
+31 −4
Original line number Diff line number Diff line
@@ -161,6 +161,16 @@ class Dataset(object):
                  epoch=0,
                  deterministic=False,
                  pad_batches=False):
    """
    
    Parameters
    ----------
   

    Returns
    -------

    """
    """Get an object that iterates over minibatches from the dataset.

    Each minibatch is returned as a tuple of four numpy arrays: (X, y, w, ids).
@@ -1035,6 +1045,9 @@ class DiskDataset(Dataset):


class Databag(object):
  """
  A utility class to iterate through multiple datasets together.
  """

  def __init__(self):
    self.datasets = dict()
@@ -1043,17 +1056,31 @@ class Databag(object):
    self.datasets[key] = dataset

  def iterbatches(self, **kwargs):
    """
    Loop through all internal datasets in the same order
    Parameters
    ----------
    batch_size: int
      Number of samples from each dataset to return
    epoch: int
      Number of times to loop through the datasets
    pad_batches: boolean
      Should all batches==batch_size

    Returns
    -------
    Generator which yields a dictionary {key: dataset.X[batch]}

    """
    key_order = [x for x in self.datasets.keys()]
    if "epochs" in kwargs:
      epochs = kwargs['epochs']
      del kwargs['epochs']
    else:
      epochs = 1
    kwargs['deterministic'] = True
    for epoch in range(epochs):
      iterators = [
          self.datasets[x].iterbatches(deterministic=True, **kwargs)
          for x in key_order
      ]
      iterators = [self.datasets[x].iterbatches(**kwargs) for x in key_order]
      for tup in six.moves.zip(*iterators):
        m_d = {key_order[i]: tup[i][0] for i in range(len(key_order))}
        yield m_d
+3 −4
Original line number Diff line number Diff line
@@ -4,8 +4,7 @@ import deepchem as dc
import tensorflow as tf
import os

from data import NumpyDataset

from deepchem.data import NumpyDataset
from deepchem.data.datasets import Databag
from deepchem.models.tensorgraph.layers import Input, Dense, LossLayer, Flatten, ReduceSquareDifference
from deepchem.models.tensorgraph.layers import Layer, Input, Reshape, Flatten, Feature, Conv2d, MaxPool, Label
@@ -57,7 +56,7 @@ class TestTensorGraph(unittest.TestCase):

    outputs = []
    entropies = []
    for i in xrange(2):
    for i in range(2):
      label = Label(shape=(None, 2))
      dense = Dense(out_channels=2, in_layers=[features])
      output = SoftMax(in_layers=[dense])
@@ -117,7 +116,7 @@ class TestTensorGraph(unittest.TestCase):

    outputs = []
    losses = []
    for i in xrange(2):
    for i in range(2):
      label = Label(shape=(None, 1))
      dense = Dense(out_channels=1, in_layers=[features])
      loss = ReduceSquareDifference(in_layers=[dense, label])
+24 −3
Original line number Diff line number Diff line
@@ -132,7 +132,11 @@ class Evaluator(object):


class GeneratorEvaluator(object):
  """Class that evaluates a model on a given dataset."""
  """
  Partner class to Evaluator.
  Instead of operating over datasets this class operates over Generator.
  Evaluate a Metric over a model and Generator.
  """

  def __init__(self,
               model,
@@ -140,8 +144,25 @@ class GeneratorEvaluator(object):
               transformers,
               labels,
               outputs=None,
               weights=list(),
               verbose=False):
               weights=list()):
    """
    Parameters
    ----------
    model: Model
      Model to evaluate
    generator: Generator
      Generator which yields {layer: numpyArray} to feed into model
    transformers:
      Tranformers to "undo" when applied to the models outputs
    labels: list of Layer
      layers which are keys in the generator to compare to outputs
    outputs: list of Layer
      if None will use the outputs of the model
    weights: np.array
      Must be of the shape (n_samples, n_tasks)
      if weights[sample][task] is 0 that sample will not be used
      for computing the task metric
    """
    self.model = model
    self.generator = generator
    self.output_transformers = [