Unverified Commit da74fdaa authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1313 from rbharath/tfbinding

Multilayer SequenceDNN
parents 52c2350c ad190826
Loading
Loading
Loading
Loading
+14 −8
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@ repository. The SequenceDNN class is useful for prediction
tasks working with genomic data.
"""
import tensorflow as tf
import collections
from deepchem.models import Sequential
from deepchem.models.tensorgraph import layers
from deepchem.data import NumpyDataset
@@ -14,9 +15,6 @@ class SequenceDNN(Sequential):
  """
  Sequence DNN models.

  # TODO(rbharath): This model only supports one-conv layer. Extend
  # so that conv layers of greater depth can be implemented.

  Parameters
  ----------
  seq_length : int
@@ -27,7 +25,7 @@ class SequenceDNN(Sequential):
      number of tasks. Default: 1.
  num_filters : list[int] | tuple[int]
      number of convolutional filters in each layer. Default: (15,).
  conv_width : list[int] | tuple[int]
  kernel_size: list[int] | tuple[int]
      width of each layer's convolutional filters. Default: (15,).
  pool_width : int
      width of max pooling after the last layer. Default: 35.
@@ -44,17 +42,25 @@ class SequenceDNN(Sequential):
               loss,
               use_RNN=False,
               num_tasks=1,
               num_filters=15,
               kernel_size=15,
               num_filters=(15,),
               kernel_size=(15,),
               pool_width=35,
               L1=0,
               dropout=0.0,
               verbose=True,
               **kwargs):
    super(SequenceDNN, self).__init__(loss, **kwargs)
    if not isinstance(num_filters, collections.Sequence) or not isinstance(
        kernel_size, collections.Sequence):
      raise ValueError("num_filters and kernel_size must be lists")
    if not len(num_filters) == len(kernel_size):
      raise ValueError("num_filters and kernel_size must be of same length")
    n_layers = len(num_filters)
    self.num_tasks = num_tasks
    self.verbose = verbose
    self.add(layers.Conv2D(num_filters, kernel_size=kernel_size))
    for layer in range(n_layers):
      self.add(
          layers.Conv2D(num_filters[layer], kernel_size=kernel_size[layer]))
      self.add(layers.Dropout(dropout))
    self.add(layers.Flatten())
    self.add(layers.Dense(self.num_tasks, activation_fn=tf.nn.relu))
+11 −2
Original line number Diff line number Diff line
@@ -19,7 +19,7 @@ class TestSequenceDNN(unittest.TestCase):
    #  #    False: num_sequences / num_negatives
    #  #} if not multitask else None,
    dataset = dc.data.NumpyDataset(X, y)
    model = dc.models.SequenceDNN(50, "binary_crossentropy", num_filters=1)
    model = dc.models.SequenceDNN(50, "binary_crossentropy", num_filters=[1])
    model.fit(dataset, nb_epoch=1)

  def test_seq_dnn_multifilter_train(self):
@@ -32,5 +32,14 @@ class TestSequenceDNN(unittest.TestCase):
    #  #    False: num_sequences / num_negatives
    #  #} if not multitask else None,
    dataset = dc.data.NumpyDataset(X, y)
    model = dc.models.SequenceDNN(50, "binary_crossentropy", num_filters=15)
    model = dc.models.SequenceDNN(50, "binary_crossentropy", num_filters=[15])
    model.fit(dataset, nb_epoch=1)

  def test_seq_dnn_multilayer_train(self):
    """Test SequenceDNN with multiple layers works."""
    X = np.random.rand(10, 1, 4, 50)
    y = np.random.randint(0, 2, size=(10, 1))
    dataset = dc.data.NumpyDataset(X, y)
    model = dc.models.SequenceDNN(
        50, "binary_crossentropy", num_filters=[1, 1], kernel_size=[15, 15])
    model.fit(dataset, nb_epoch=1)