Unverified Commit 36828502 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1686 from peastman/residual

Support residual networks
parents 9d0fc555 e0fcfa90
Loading
Loading
Loading
Loading
+63 −7
Original line number Diff line number Diff line
@@ -16,12 +16,25 @@ from deepchem.models import KerasModel
from deepchem.models.layers import SwitchedDropout
from deepchem.utils.save import log
from deepchem.metrics import to_one_hot, from_one_hot
from tensorflow.keras.layers import Input, Dense, Reshape, Softmax, Dropout, Activation
from tensorflow.keras.layers import Input, Dense, Reshape, Softmax, Dropout, Activation, Lambda

logger = logging.getLogger(__name__)


class MultitaskClassifier(KerasModel):
  """A fully connected network for multitask classification.

  This class provides lots of options for customizing aspects of the model: the
  number and widths of layers, the activation functions, regularization methods,
  etc.

  It optionally can compose the model from pre-activation residual blocks, as
  described in https://arxiv.org/abs/1603.05027, rather than a simple stack of
  dense layers.  This often leads to easier training, especially when using a
  large number of layers.  Note that residual blocks can only be used when
  successive layers have the same width.  Wherever the layer width changes, a
  simple dense layer will be used even if residual=True.
  """

  def __init__(self,
               n_tasks,
@@ -34,6 +47,7 @@ class MultitaskClassifier(KerasModel):
               dropouts=0.5,
               activation_fns=tf.nn.relu,
               n_classes=2,
               residual=False,
               **kwargs):
    """Create a MultitaskClassifier.

@@ -73,6 +87,9 @@ class MultitaskClassifier(KerasModel):
      same value is used for every layer.
    n_classes: int
      the number of classes
    residual: bool
      if True, the model will be composed of pre-activation residual blocks instead
      of a simple stack of dense layers.
    """
    self.n_tasks = n_tasks
    self.n_features = n_features
@@ -98,22 +115,33 @@ class MultitaskClassifier(KerasModel):

    mol_features = Input(shape=(n_features,))
    prev_layer = mol_features
    prev_size = n_features
    next_activation = None

    # Add the dense layers

    for size, weight_stddev, bias_const, dropout, activation_fn in zip(
        layer_sizes, weight_init_stddevs, bias_init_consts, dropouts,
        activation_fns):
      layer = prev_layer
      if next_activation is not None:
        layer = Activation(activation_fn)(layer)
      layer = Dense(
          size,
          activation=activation_fn,
          kernel_initializer=tf.truncated_normal_initializer(
              stddev=weight_stddev),
          bias_initializer=tf.constant_initializer(value=bias_const),
          kernel_regularizer=regularizer)(prev_layer)
          kernel_regularizer=regularizer)(layer)
      if dropout > 0.0:
        layer = Dropout(rate=dropout)(layer)
      if residual and prev_size == size:
        prev_layer = Lambda(lambda x: x[0] + x[1])([prev_layer, layer])
      else:
        prev_layer = layer
      prev_size = size
      next_activation = activation_fn
    if next_activation is not None:
      prev_layer = Activation(activation_fn)(prev_layer)
    self.neural_fingerprint = prev_layer
    logits = Reshape((n_tasks,
                      n_classes))(Dense(n_tasks * n_classes)(prev_layer))
@@ -143,6 +171,19 @@ class MultitaskClassifier(KerasModel):


class MultitaskRegressor(KerasModel):
  """A fully connected network for multitask regression.

  This class provides lots of options for customizing aspects of the model: the
  number and widths of layers, the activation functions, regularization methods,
  etc.

  It optionally can compose the model from pre-activation residual blocks, as
  described in https://arxiv.org/abs/1603.05027, rather than a simple stack of
  dense layers.  This often leads to easier training, especially when using a
  large number of layers.  Note that residual blocks can only be used when
  successive layers have the same width.  Wherever the layer width changes, a
  simple dense layer will be used even if residual=True.
  """

  def __init__(self,
               n_tasks,
@@ -155,6 +196,7 @@ class MultitaskRegressor(KerasModel):
               dropouts=0.5,
               activation_fns=tf.nn.relu,
               uncertainty=False,
               residual=False,
               **kwargs):
    """Create a MultitaskRegressor.

@@ -191,6 +233,9 @@ class MultitaskRegressor(KerasModel):
    uncertainty: bool
      if True, include extra outputs and loss terms to enable the uncertainty
      in outputs to be predicted
    residual: bool
      if True, the model will be composed of pre-activation residual blocks instead
      of a simple stack of dense layers.
    """
    self.n_tasks = n_tasks
    self.n_features = n_features
@@ -220,22 +265,33 @@ class MultitaskRegressor(KerasModel):
    mol_features = Input(shape=(n_features,))
    dropout_switch = Input(shape=tuple())
    prev_layer = mol_features
    prev_size = n_features
    next_activation = None

    # Add the dense layers

    for size, weight_stddev, bias_const, dropout, activation_fn in zip(
        layer_sizes, weight_init_stddevs, bias_init_consts, dropouts,
        activation_fns):
      layer = prev_layer
      if next_activation is not None:
        layer = Activation(activation_fn)(layer)
      layer = Dense(
          size,
          activation=activation_fn,
          kernel_initializer=tf.truncated_normal_initializer(
              stddev=weight_stddev),
          bias_initializer=tf.constant_initializer(value=bias_const),
          kernel_regularizer=regularizer)(prev_layer)
          kernel_regularizer=regularizer)(layer)
      if dropout > 0.0:
        layer = SwitchedDropout(rate=dropout)([layer, dropout_switch])
      if residual and prev_size == size:
        prev_layer = Lambda(lambda x: x[0] + x[1])([prev_layer, layer])
      else:
        prev_layer = layer
      prev_size = size
      next_activation = activation_fn
    if next_activation is not None:
      prev_layer = Activation(activation_fn)(prev_layer)
    self.neural_fingerprint = prev_layer
    output = Reshape((n_tasks, 1))(Dense(
        n_tasks,
+66 −7
Original line number Diff line number Diff line
@@ -177,6 +177,37 @@ class TestOverfit(test_util.TensorFlowTestCase):
    scores = model.evaluate(dataset, [classification_metric])
    assert scores[classification_metric.name] > .9

  def test_residual_classification_overfit(self):
    """Test that a residual network can overfit simple classification datasets."""
    n_samples = 10
    n_features = 5
    n_tasks = 1
    n_classes = 2

    # Generate dummy dataset
    np.random.seed(123)
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.random.randint(2, size=(n_samples, n_tasks))
    w = np.ones((n_samples, n_tasks))
    dataset = dc.data.NumpyDataset(X, y, w, ids)

    classification_metric = dc.metrics.Metric(dc.metrics.accuracy_score)
    model = dc.models.MultitaskClassifier(
        n_tasks,
        n_features,
        layer_sizes=[20] * 10,
        dropouts=0.0,
        batch_size=n_samples,
        residual=True)

    # Fit trained model
    model.fit(dataset, nb_epoch=500)

    # Eval model on train
    scores = model.evaluate(dataset, [classification_metric])
    assert scores[classification_metric.name] > .9

  def test_fittransform_regression_overfit(self):
    """Test that TensorGraph FitTransform models can overfit simple regression datasets."""
    n_samples = 10
@@ -451,14 +482,42 @@ class TestOverfit(test_util.TensorFlowTestCase):
    """Test TensorGraph multitask overfits tiny data."""
    n_tasks = 10
    n_samples = 10
    n_features = 3
    n_features = 10
    n_classes = 2

    # Generate dummy dataset
    np.random.seed(123)
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.zeros((n_samples, n_tasks))
    y = np.random.rand(n_samples, n_tasks)
    w = np.ones((n_samples, n_tasks))

    dataset = dc.data.NumpyDataset(X, y, w, ids)

    regression_metric = dc.metrics.Metric(
        dc.metrics.mean_squared_error, task_averager=np.mean, mode="regression")
    model = dc.models.MultitaskRegressor(
        n_tasks, n_features, dropouts=0.0, batch_size=n_samples)

    # Fit trained model
    model.fit(dataset, nb_epoch=1000)

    # Eval model on train
    scores = model.evaluate(dataset, [regression_metric])
    assert scores[regression_metric.name] < .02

  def test_residual_regression_overfit(self):
    """Test that a residual multitask network can overfit tiny data."""
    n_tasks = 10
    n_samples = 10
    n_features = 10
    n_classes = 2

    # Generate dummy dataset
    np.random.seed(123)
    ids = np.arange(n_samples)
    X = np.random.rand(n_samples, n_features)
    y = np.random.rand(n_samples, n_tasks)
    w = np.ones((n_samples, n_tasks))

    dataset = dc.data.NumpyDataset(X, y, w, ids)
@@ -468,17 +527,17 @@ class TestOverfit(test_util.TensorFlowTestCase):
    model = dc.models.MultitaskRegressor(
        n_tasks,
        n_features,
        dropouts=[0.],
        weight_init_stddevs=[.1],
        layer_sizes=[20] * 10,
        dropouts=0.0,
        batch_size=n_samples,
        optimizer=Adam(learning_rate=0.0003, beta1=0.9, beta2=0.999))
        residual=True)

    # Fit trained model
    model.fit(dataset, nb_epoch=50)
    model.fit(dataset, nb_epoch=1000)

    # Eval model on train
    scores = model.evaluate(dataset, [regression_metric])
    assert scores[regression_metric.name] < .1
    assert scores[regression_metric.name] < .02

  def test_tf_robust_multitask_regression_overfit(self):
    """Test tf robust multitask overfits tiny data."""