Commit 38ff87d3 authored by peastman's avatar peastman
Browse files

in_silico_mutagenesis() works with arbitrary models

parent 83ab8c3c
Loading
Loading
Loading
Loading
+8 −7
Original line number Diff line number Diff line
@@ -102,8 +102,8 @@ def in_silico_mutagenesis(model, X):

  Parameters
  ----------
  model: TensorGraph
    Currently only SequenceDNN will work, but other models may be added.
  model: Model
    This can be any model that accepts inputs of the required shape.
  X: ndarray
    Shape (N_sequences, N_letters, sequence_length, 1)

@@ -111,10 +111,11 @@ def in_silico_mutagenesis(model, X):
  -------
  (num_task, N_sequences, N_letters, sequence_length, 1) ISM score array.
  """
  #Shape (N_sequences, N_letters, sequence_length, 1, num_tasks)
  mutagenesis_scores = np.empty(X.shape + (model.num_tasks,), dtype=np.float32)
  # Shape (N_sequences, num_tasks)
  wild_type_predictions = model.predict(NumpyDataset(X))
  num_tasks = wild_type_predictions.shape[1]
  #Shape (N_sequences, N_letters, sequence_length, 1, num_tasks)
  mutagenesis_scores = np.empty(X.shape + (num_tasks,), dtype=np.float32)
  # Shape (N_sequences, num_tasks, 1, 1, 1)
  wild_type_predictions = wild_type_predictions[:, np.newaxis, np.newaxis,
                                                np.newaxis]
@@ -141,7 +142,7 @@ def in_silico_mutagenesis(model, X):
    # make mutant predictions
    mutated_predictions = model.predict(NumpyDataset(mutated_sequences))
    mutated_predictions = mutated_predictions.reshape(sequence.shape +
                                                      (model.num_tasks,))
                                                      (num_tasks,))
    mutagenesis_scores[
        sequence_index] = wild_type_prediction - mutated_predictions
  rolled_scores = np.rollaxis(mutagenesis_scores, -1)
+14 −5
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ import os

import numpy as np
import deepchem as dc
import tensorflow as tf

LETTERS = "ACGT"

@@ -46,6 +47,16 @@ class TestGenomicMetrics(unittest.TestCase):
    pssm_scores = get_pssm_scores(sequences, pssm)
    self.assertEqual(pssm_scores.shape, (3, 5))

  def create_model_for_mutagenesis(self):
    keras_model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(1, 15, activation='relu', padding='same'),
        tf.keras.layers.Conv2D(1, 15, activation='relu', padding='same'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation='relu')
    ])
    return dc.models.KerasModel(keras_model,
                                dc.models.losses.BinaryCrossEntropy())

  def test_in_silico_mutagenesis_shape(self):
    """Test in-silico mutagenesis returns correct shape."""
    # Construct and train SequenceDNN model
@@ -59,8 +70,7 @@ class TestGenomicMetrics(unittest.TestCase):
    #y = np.random.randint(0, 2, size=(10, 1))
    #dataset = dc.data.NumpyDataset(X, y)
    dataset = dc.data.NumpyDataset(sequences, labels)
    model = dc.models.SequenceDNN(
        5, "binary_crossentropy", num_filters=[1, 1], kernel_size=[15, 15])
    model = self.create_model_for_mutagenesis()
    model.fit(dataset, nb_epoch=1)

    # Call in-silico mutagenesis
@@ -80,8 +90,7 @@ class TestGenomicMetrics(unittest.TestCase):
    #y = np.random.randint(0, 2, size=(10, 1))
    #dataset = dc.data.NumpyDataset(X, y)
    dataset = dc.data.NumpyDataset(sequences, labels)
    model = dc.models.SequenceDNN(
        5, "binary_crossentropy", num_filters=[1, 1], kernel_size=[15, 15])
    model = self.create_model_for_mutagenesis()
    model.fit(dataset, nb_epoch=1)

    # Call in-silico mutagenesis