Commit 6959cf0f authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #522 from peastman/fc

Convert fully connected models to tensorgraph
parents ed16e6e9 933a4f1c
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -18,11 +18,14 @@ from deepchem.models.sequential import Sequential
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskRegressor
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskClassifier
from deepchem.models.tensorflow_models.fcnet import TensorflowMultiTaskFitTransformRegressor
from deepchem.models.tensorflow_models.fcnet import TensorGraphMultiTaskRegressor
from deepchem.models.tensorflow_models.fcnet import TensorGraphMultiTaskClassifier
from deepchem.models.tensorflow_models.fcnet import TensorGraphMultiTaskFitTransformRegressor
from deepchem.models.tensorflow_models.robust_multitask import RobustMultitaskRegressor
from deepchem.models.tensorflow_models.robust_multitask import RobustMultitaskClassifier
from deepchem.models.tensorflow_models.lr import TensorflowLogisticRegression
from deepchem.models.tensorflow_models.progressive_multitask import ProgressiveMultitaskRegressor
from deepchem.models.tensorflow_models.progressive_joint import ProgressiveJointRegressor
from deepchem.models.tensorflow_models.IRV import TensorflowMultiTaskIRVClassifier
from deepchem.models.tensorgraph.tensor_graph import TensorGraph, MultiTaskTensorGraph
from deepchem.models.tensorgraph.tensor_graph import TensorGraph
from deepchem.models.tensorgraph.models.graph_models import WeaveTensorGraph, DTNNTensorGraph, DAGTensorGraph
+320 −0
Original line number Diff line number Diff line
@@ -18,6 +18,326 @@ from deepchem.models.tensorflow_models import TensorflowClassifier
from deepchem.models.tensorflow_models import TensorflowRegressor
from deepchem.metrics import to_one_hot

from deepchem.models.tensorgraph.tensor_graph import TensorGraph, TFWrapper
from deepchem.models.tensorgraph.layers import Feature, Label, Weights, WeightedError, Dense, Dropout, WeightDecay, Reshape, SoftMaxCrossEntropy, L2Loss


class TensorGraphMultiTaskClassifier(TensorGraph):

  def __init__(self,
               n_tasks,
               n_features,
               layer_sizes=[1000],
               weight_init_stddevs=[0.02],
               bias_init_consts=[1.0],
               weight_decay_penalty=0.0,
               weight_decay_penalty_type="l2",
               dropouts=[0.5],
               n_classes=2,
               **kwargs):
    """Create a TensorGraphMultiTaskClassifier.

    In addition to the following arguments, this class also accepts all the keywork arguments
    from TensorGraph.

    Parameters
    ----------
    n_tasks: int
      number of tasks
    n_features: int
      number of features
    layer_sizes: list
      the size of each dense layer in the network.  The length of this list determines the number of layers.
    weight_init_stddevs: list
      the standard deviation of the distribution to use for weight initialization of each layer.  The length
      of this list should equal len(layer_sizes).
    bias_init_consts: list
      the value to initialize the biases in each layer to.  The length of this list should equal len(layer_sizes).
    weight_decay_penalty: float
      the magnitude of the weight decay penalty to use
    weight_decay_penalty_type: str
      the type of penalty to use for weight decay, either 'l1' or 'l2'
    dropouts: list
      the dropout probablity to use for each layer.  The length of this list should equal len(layer_sizes).
    n_classes: int
      the number of classes
    """
    super().__init__(mode='classification', **kwargs)
    self.n_tasks = n_tasks
    self.n_features = n_features
    self.n_classes = n_classes

    # Add the input features.

    mol_features = Feature(shape=(None, n_features))
    prev_layer = mol_features

    # Add the dense layers

    for size, weight_stddev, bias_const, dropout in zip(
        layer_sizes, weight_init_stddevs, bias_init_consts, dropouts):
      layer = Dense(
          in_layers=[prev_layer],
          out_channels=size,
          activation_fn=tf.nn.relu,
          weights_initializer=TFWrapper(
              tf.truncated_normal_initializer, stddev=weight_stddev),
          biases_initializer=TFWrapper(
              tf.constant_initializer, value=bias_const))
      if dropout > 0.0:
        layer = Dropout(dropout, in_layers=[layer])
      prev_layer = layer

    # Compute the loss function for each label.

    output = Reshape(
        shape=(-1, n_tasks, n_classes),
        in_layers=[
            Dense(in_layers=[prev_layer], out_channels=n_tasks * n_classes)
        ])
    self.add_output(output)
    labels = Label(shape=(None, n_tasks, n_classes))
    weights = Weights(shape=(None, n_tasks))
    loss = Reshape(
        shape=(-1, n_tasks),
        in_layers=[SoftMaxCrossEntropy(in_layers=[labels, output])])
    weighted_loss = WeightedError(in_layers=[loss, weights])
    if weight_decay_penalty != 0.0:
      weighted_loss = WeightDecay(
          weight_decay_penalty,
          weight_decay_penalty_type,
          in_layers=[weighted_loss])
    self.set_loss(weighted_loss)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=True,
          pad_batches=pad_batches):
        feed_dict = dict()
        if y_b is not None and not predict:
          feed_dict[self.labels[0]] = to_one_hot(
              y_b.flatten(), self.n_classes).reshape(-1, self.n_tasks,
                                                     self.n_classes)
        if X_b is not None:
          feed_dict[self.features[0]] = X_b
        if w_b is not None and not predict:
          feed_dict[self.task_weights[0]] = w_b
        yield feed_dict


class TensorGraphMultiTaskRegressor(TensorGraph):

  def __init__(self,
               n_tasks,
               n_features,
               layer_sizes=[1000],
               weight_init_stddevs=[0.02, 0.02],
               bias_init_consts=[1.0, 1.0],
               weight_decay_penalty=0.0,
               weight_decay_penalty_type="l2",
               dropouts=[0.5],
               **kwargs):
    """Create a TensorGraphMultiTaskRegressor.

    In addition to the following arguments, this class also accepts all the keywork arguments
    from TensorGraph.

    Parameters
    ----------
    n_tasks: int
      number of tasks
    n_features: int
      number of features
    layer_sizes: list
      the size of each dense layer in the network.  The length of this list determines the number of layers.
    weight_init_stddevs: list
      the standard deviation of the distribution to use for weight initialization of each layer.  The length
      of this list should equal len(layer_sizes)+1.  The final element corresponds to the output layer.
    bias_init_consts: list
      the value to initialize the biases in each layer to.  The length of this list should equal len(layer_sizes)+1.
      The final element corresponds to the output layer.
    weight_decay_penalty: float
      the magnitude of the weight decay penalty to use
    weight_decay_penalty_type: str
      the type of penalty to use for weight decay, either 'l1' or 'l2'
    dropouts: list
      the dropout probablity to use for each layer.  The length of this list should equal len(layer_sizes).
    """
    super().__init__(mode='regression', **kwargs)
    self.n_tasks = n_tasks
    self.n_features = n_features

    # Add the input features.

    mol_features = Feature(shape=(None, n_features))
    prev_layer = mol_features

    # Add the dense layers

    for size, weight_stddev, bias_const, dropout in zip(
        layer_sizes, weight_init_stddevs, bias_init_consts, dropouts):
      layer = Dense(
          in_layers=[prev_layer],
          out_channels=size,
          activation_fn=tf.nn.relu,
          weights_initializer=TFWrapper(
              tf.truncated_normal_initializer, stddev=weight_stddev),
          biases_initializer=TFWrapper(
              tf.constant_initializer, value=bias_const))
      if dropout > 0.0:
        layer = Dropout(dropout, in_layers=[layer])
      prev_layer = layer

    # Compute the loss function for each label.

    output = Reshape(
        shape=(-1, n_tasks, 1),
        in_layers=[
            Dense(
                in_layers=[prev_layer],
                out_channels=n_tasks,
                weights_initializer=TFWrapper(
                    tf.truncated_normal_initializer,
                    stddev=weight_init_stddevs[-1]),
                biases_initializer=TFWrapper(
                    tf.constant_initializer, value=bias_init_consts[-1]))
        ])
    self.add_output(output)
    labels = Label(shape=(None, n_tasks, 1))
    weights = Weights(shape=(None, n_tasks))
    loss = Reshape(
        shape=(-1, n_tasks), in_layers=[L2Loss(in_layers=[labels, output])])
    weighted_loss = WeightedError(in_layers=[loss, weights])
    if weight_decay_penalty != 0.0:
      weighted_loss = WeightDecay(
          weight_decay_penalty,
          weight_decay_penalty_type,
          in_layers=[weighted_loss])
    self.set_loss(weighted_loss)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=True,
          pad_batches=pad_batches):
        feed_dict = dict()
        if y_b is not None and not predict:
          feed_dict[self.labels[0]] = y_b.reshape(-1, self.n_tasks, 1)
        if X_b is not None:
          feed_dict[self.features[0]] = X_b
        if w_b is not None and not predict:
          feed_dict[self.task_weights[0]] = w_b
        yield feed_dict


class TensorGraphMultiTaskFitTransformRegressor(TensorGraphMultiTaskRegressor):
  """Implements a TensorGraphMultiTaskRegressor that performs on-the-fly transformation during fit/predict.

  Example:

  >>> n_samples = 10
  >>> n_features = 3
  >>> n_tasks = 1
  >>> ids = np.arange(n_samples)
  >>> X = np.random.rand(n_samples, n_features, n_features)
  >>> y = np.zeros((n_samples, n_tasks))
  >>> w = np.ones((n_samples, n_tasks))
  >>> dataset = dc.data.NumpyDataset(X, y, w, ids)
  >>> fit_transformers = [dc.trans.CoulombFitTransformer(dataset)]
  >>> model = dc.models.TensorflowMultiTaskFitTransformRegressor(n_tasks, [n_features, n_features],
  ...     dropouts=[0.], learning_rate=0.003, weight_init_stddevs=[np.sqrt(6)/np.sqrt(1000)],
  ...     batch_size=n_samples, fit_transformers=fit_transformers, n_evals=1)
  n_features after fit_transform: 12
  """

  def __init__(self,
               n_tasks,
               n_features,
               fit_transformers=[],
               n_evals=1,
               batch_size=50,
               **kwargs):
    """Create a TensorGraphMultiTaskFitTransformRegressor.

    In addition to the following arguments, this class also accepts all the keywork arguments
    from TensorGraphMultiTaskRegressor.

    Parameters
    ----------
    n_tasks: int
      number of tasks
    n_features: list or int
      number of features
    fit_transformers: list
      List of dc.trans.FitTransformer objects
    n_evals: int
      Number of evalations per example at predict time
    """
    self.fit_transformers = fit_transformers
    self.n_evals = n_evals

    # Run fit transformers on dummy dataset to determine n_features after transformation

    if isinstance(n_features, list):
      X_b = np.ones([batch_size] + n_features)
    elif isinstance(n_features, int):
      X_b = np.ones([batch_size, n_features])
    else:
      raise ValueError("n_features should be list or int")
    for transformer in fit_transformers:
      X_b = transformer.X_transform(X_b)
    n_features = X_b.shape[1]
    print("n_features after fit_transform: %d" % int(n_features))
    super().__init__(n_tasks, n_features, batch_size=batch_size, **kwargs)

  def default_generator(self,
                        dataset,
                        epochs=1,
                        predict=False,
                        pad_batches=True):
    for epoch in range(epochs):
      for (X_b, y_b, w_b, ids_b) in dataset.iterbatches(
          batch_size=self.batch_size,
          deterministic=True,
          pad_batches=pad_batches):
        feed_dict = dict()
        if y_b is not None and not predict:
          feed_dict[self.labels[0]] = y_b.reshape(-1, self.n_tasks, 1)
        if X_b is not None:
          if not predict:
            for transformer in self.fit_transformers:
              X_b = transformer.X_transform(X_b)
          feed_dict[self.features[0]] = X_b
        if w_b is not None and not predict:
          feed_dict[self.task_weights[0]] = w_b
        yield feed_dict

  def predict_proba_on_generator(self, generator, transformers=[]):

    def transform_generator():
      for feed_dict in generator:
        X = feed_dict[self.features[0]]
        for i in range(self.n_evals):
          X_t = X
        for transformer in self.fit_transformers:
          X_t = transformer.X_transform(X_t)
        feed_dict[self.features[0]] = X_t
        yield feed_dict

    return super().predict_proba_on_generator(transform_generator(),
                                              transformers)


class TensorflowMultiTaskClassifier(TensorflowClassifier):
  """Implements an icml model as configured in a model_config.proto."""
+1 −1
Original line number Diff line number Diff line
from deepchem.models.tensorgraph.tensor_graph import TensorGraphfrom deepchem.models.tensorgraph import models
 No newline at end of file
from deepchem.models.tensorgraph.tensor_graph import TensorGraph, TFWrapperfrom deepchem.models.tensorgraph import models
 No newline at end of file
+9 −9
Original line number Diff line number Diff line
@@ -25,7 +25,7 @@ class Combine_AP(Layer):
  def __init__(self, **kwargs):
    super(Combine_AP, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None):
  def create_tensor(self, in_layers=None, **kwargs):
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)
@@ -39,7 +39,7 @@ class Separate_AP(Layer):
  def __init__(self, **kwargs):
    super(Separate_AP, self).__init__(**kwargs)

  def create_tensor(self, in_layers=None):
  def create_tensor(self, in_layers=None, **kwargs):
    if in_layers is None:
      in_layers = self.in_layers
    in_layers = convert_to_layers(in_layers)
@@ -147,7 +147,7 @@ class WeaveLayer(Layer):
      self.trainable_weights.extend(
          [self.W_AP, self.b_AP, self.W_PP, self.b_PP, self.W_P, self.b_P])

  def create_tensor(self, in_layers=None):
  def create_tensor(self, in_layers=None, **kwargs):
    """ description and explanation refer to deepchem.nn.WeaveLayer
    parent layers: [atom_features, pair_features], pair_split, atom_to_pair
    """
@@ -241,7 +241,7 @@ class WeaveGather(Layer):
    else:
      self.trainable_weights = None

  def create_tensor(self, in_layers=None):
  def create_tensor(self, in_layers=None, **kwargs):
    """ description and explanation refer to deepchem.nn.WeaveGather
    parent layers: atom_features, atom_split
    """
@@ -312,7 +312,7 @@ class DTNNEmbedding(Layer):
        [self.periodic_table_length, self.n_embedding])
    self.trainable_weights = [self.embedding_list]

  def create_tensor(self, in_layers=None):
  def create_tensor(self, in_layers=None, **kwargs):
    """description and explanation refer to deepchem.nn.DTNNEmbedding
    parent layers: atom_number
    """
@@ -375,7 +375,7 @@ class DTNNStep(Layer):
        self.W_cf, self.W_df, self.W_fc, self.b_cf, self.b_df
    ]

  def create_tensor(self, in_layers=None):
  def create_tensor(self, in_layers=None, **kwargs):
    """description and explanation refer to deepchem.nn.DTNNStep
    parent layers: atom_features, distance, distance_membership_i, distance_membership_j
    """
@@ -461,7 +461,7 @@ class DTNNGather(Layer):

    self.trainable_weights = self.W_list + self.b_list

  def create_tensor(self, in_layers=None):
  def create_tensor(self, in_layers=None, **kwargs):
    """description and explanation refer to deepchem.nn.DTNNGather
    parent layers: atom_features, atom_membership
    """
@@ -548,7 +548,7 @@ class DAGLayer(Layer):

    self.trainable_weights = self.W_list + self.b_list

  def create_tensor(self, in_layers=None):
  def create_tensor(self, in_layers=None, **kwargs):
    """description and explanation refer to deepchem.nn.DAGLayer
    parent layers: atom_features, parents, calculation_orders, calculation_masks, n_atoms
    """
@@ -686,7 +686,7 @@ class DAGGather(Layer):

    self.trainable_weights = self.W_list + self.b_list

  def create_tensor(self, in_layers=None):
  def create_tensor(self, in_layers=None, **kwargs):
    """description and explanation refer to deepchem.nn.DAGGather
    parent layers: atom_features, membership
    """
+82 −34

File changed.

Preview size limit exceeded, changes collapsed.

Loading