Commit bddbc31a authored by Peter Eastman's avatar Peter Eastman
Browse files

Began converting fully connected models to tensorgraph

parent 691e4a91
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -25,4 +25,4 @@ from deepchem.models.tensorflow_models.lr import TensorflowLogisticRegression
from deepchem.models.tensorflow_models.progressive_multitask import ProgressiveMultitaskRegressor
from deepchem.models.tensorflow_models.progressive_joint import ProgressiveJointRegressor
from deepchem.models.tensorflow_models.IRV import TensorflowMultiTaskIRVClassifier
from deepchem.models.tensorgraph.tensor_graph import TensorGraph, MultiTaskTensorGraph
 No newline at end of file
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
 No newline at end of file
+113 −0
Original line number Diff line number Diff line
@@ -19,6 +19,119 @@ from deepchem.models.tensorflow_models import TensorflowRegressor
from deepchem.metrics import to_one_hot


from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Reshape, SoftMaxCrossEntropy, L2LossLayer

class TensorflowMultiTaskClassifier2(TensorGraph):
  def __init__(self,
               n_tasks,
               n_features,
               layer_sizes=[1000],
               n_classes=2,
               **kwargs):
    super().__init__(mode='classification', **kwargs)
    self.n_tasks = n_tasks
    self.n_features = n_features
    self.n_classes = n_classes

    # Add the input features.

    mol_features = Feature(shape=(None, n_features))
    prev_layer = mol_features

    # Add the dense layers

    for size in layer_sizes:
      layer = Dense(in_layers=[prev_layer], out_channels=size, activation_fn=tf.nn.relu)
      prev_layer = layer

    # Compute the loss function for each label.

    output = Reshape(shape=(-1, n_tasks, n_classes), in_layers=[Dense(in_layers=[prev_layer], out_channels=n_tasks*n_classes)])
    self.add_output(output)
    labels = Label(shape=(None, n_tasks, n_classes))
    weights = Weights(shape=(None, n_tasks))
    loss = Reshape(shape=(-1, n_tasks), in_layers=[SoftMaxCrossEntropy(in_layers=[labels, output])])
    weighted_loss = WeightedError(in_layers=[loss, weights])
    self.set_loss(weighted_loss)


  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=True,
          pad_batches=pad_batches):
        feed_dict = dict()
        if y_b is not None and not predict:
          feed_dict[self.labels[0]] = to_one_hot(y_b.flatten(), self.n_classes).reshape(-1, self.n_tasks, self.n_classes)
        if X_b is not None:
          feed_dict[self.features[0]] = X_b
        if w_b is not None and not predict:
          feed_dict[self.task_weights[0]] = w_b
        yield feed_dict




class TensorflowMultiTaskRegressor2(TensorGraph):
  def __init__(self,
               n_tasks,
               n_features,
               layer_sizes=[1000],
               **kwargs):
    super().__init__(mode='regression', **kwargs)
    self.n_tasks = n_tasks
    self.n_features = n_features

    # Add the input features.

    mol_features = Feature(shape=(None, n_features))
    prev_layer = mol_features

    # Add the dense layers

    for size in layer_sizes:
      layer = Dense(in_layers=[prev_layer], out_channels=size, activation_fn=tf.nn.relu)
      prev_layer = layer

    # Compute the loss function for each label.

    output = Reshape(shape=(-1, n_tasks, 1), in_layers=[Dense(in_layers=[prev_layer], out_channels=n_tasks)])
    self.add_output(output)
    labels = Label(shape=(None, n_tasks, 1))
    weights = Weights(shape=(None, n_tasks))
    loss = Reshape(shape=(-1, n_tasks), in_layers=[L2LossLayer(in_layers=[labels, output])])
    weighted_loss = WeightedError(in_layers=[loss, weights])
    self.set_loss(weighted_loss)


  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=True,
          pad_batches=pad_batches):
        feed_dict = dict()
        if y_b is not None and not predict:
          feed_dict[self.labels[0]] = y_b.reshape(-1, self.n_tasks, 1)
        if X_b is not None:
          feed_dict[self.features[0]] = X_b
        if w_b is not None and not predict:
          feed_dict[self.task_weights[0]] = w_b
        yield feed_dict




class TensorflowMultiTaskClassifier(TensorflowClassifier):
  """Implements an icml model as configured in a model_config.proto."""

+12 −35
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ from deepchem.data import NumpyDataset
from deepchem.metrics import to_one_hot, from_one_hot
from deepchem.models.models import Model
from deepchem.models.tensorgraph.layers import InputFifoQueue, Label, Feature, Weights
from deepchem.trans import undo_transforms
from deepchem.utils.evaluate import GeneratorEvaluator


@@ -214,7 +215,7 @@ class TensorGraph(Model):
          feed_dict[self.task_weights[0]] = w_b
        yield feed_dict

  def predict_on_generator(self, generator):
  def predict_on_generator(self, generator, transformers=[]):
    """Generates output predictions for the input samples,
      processing the samples in a batched way.

@@ -226,12 +227,12 @@ class TensorGraph(Model):
    # Returns
        A Numpy array of predictions.
    """
    retval = self.predict_proba_on_generator(generator)
    retval = self.predict_proba_on_generator(generator, transformers)
    if self.mode == 'classification':
      retval = np.expand_dims(from_one_hot(retval, axis=2), axis=1)
    return retval

  def predict_proba_on_generator(self, generator):
  def predict_proba_on_generator(self, generator, transformers=[]):
    """
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
@@ -252,10 +253,12 @@ class TensorGraph(Model):
          result = np.array(sess.run(out_tensors, feed_dict=feed_dict))
          if len(result.shape) == 3:
            result = np.transpose(result, axes=[1, 0, 2])
          result = result.squeeze(axis=0)
          result = undo_transforms(result, transformers)
          results.append(result)
        return np.concatenate(results, axis=0)

  def predict_on_batch(self, X, sess=None):
  def predict_on_batch(self, X, sess=None, transformers=[]):
    """Generates output predictions for the input samples,
      processing the samples in a batched way.

@@ -269,12 +272,12 @@ class TensorGraph(Model):
    """
    dataset = NumpyDataset(X=X, y=None)
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator)
    return self.predict_on_generator(generator, transformers)

  def predict_proba_on_batch(self, X, sess=None):
  def predict_proba_on_batch(self, X, sess=None, transformers=[]):
    dataset = NumpyDataset(X=X, y=None)
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_proba_on_generator(generator)
    return self.predict_proba_on_generator(generator, transformers)

  def predict(self, dataset, transformers=[], batch_size=None):
    """
@@ -283,10 +286,8 @@ class TensorGraph(Model):
    Returns:
      y_pred: numpy ndarray of shape (n_samples,)
    """
    if len(transformers) > 0:
      raise ValueError("Tensorgraph does not support transformers")
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_on_generator(generator)
    return self.predict_on_generator(generator, transformers)

  def predict_proba(self, dataset, transformers=[], batch_size=None):
    """
@@ -295,10 +296,8 @@ class TensorGraph(Model):
    Returns:
      y_pred: numpy ndarray of shape (n_samples, n_classes*n_tasks)
    """
    if len(transformers) > 0:
      raise ValueError("Tensorgraph does not support transformers")
    generator = self.default_generator(dataset, predict=True, pad_batches=False)
    return self.predict_proba_on_generator(generator)
    return self.predict_proba_on_generator(generator, transformers)

  def topsort(self):
    return nx.topological_sort(self.nxgraph)
@@ -506,25 +505,3 @@ def _enqueue_batch(tg, generator, graph, sess, coord):
    sess.run(tg.input_queue.close_op)
    coord.num_samples = num_samples
    coord.request_stop()


class MultiTaskTensorGraph(TensorGraph):
  """
  Class created for legacy sake
  Assumes y is a vector of booleans representing
  classification metrics
  """

  def __init__(self, **kwargs):
    super(MultiTaskTensorGraph, self).__init__(**kwargs)

  def _construct_feed_dict(self, X_b, y_b, w_b, ids_b):
    feed_dict = dict()
    if y_b is not None:
      for index, label in enumerate(self.labels):
        feed_dict[label.out_tensor] = to_one_hot(y_b[:, index])
    if self.task_weights is not None and w_b is not None:
      feed_dict[self.task_weights.out_tensor] = w_b
    if self.features is not None:
      feed_dict[self.features[0].out_tensor] = X_b
    return feed_dict