Commit 49ec44f1 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

updates

parents 3692624f c521c1c9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@ Imports all submodules
from __future__ import division
from __future__ import unicode_literals

__version__ = '2.0.0'
__version__ = '2.1.0'

import deepchem.data
import deepchem.feat
+43 −0
Original line number Diff line number Diff line
"""Evaluation Metrics for Genomics Datasets."""

import numpy as np
from deepchem.data import NumpyDataset
from deepchem.utils.genomics import loaded_motifs
from scipy.signal import correlate2d

@@ -91,3 +92,45 @@ def get_pssm_scores(encoded_sequences, pssm):
  # take max of fwd and reverse scores at each position
  scores = np.maximum(fwd_scores, rc_scores)
  return scores


def in_silico_mutagenesis(model, X):
  """Computes in-silico-mutagenesis scores

    Parameters
    ----------
    model: TensorGraph
      SequenceDNN?
    X: array...
      What shape?

    Returns
    -------
    #(num_task, N_sequences, 1, N_letters, sequence_length) ISM score array.
    (num_task, N_sequences, N_letters, sequence_length) ISM score array.
    """
  mutagenesis_scores = np.empty(X.shape + (model.num_tasks,), dtype=np.float32)
  wild_type_predictions = model.predict(NumpyDataset(X))
  wild_type_predictions = wild_type_predictions[:, np.newaxis, np.newaxis,
                                                np.newaxis]
  for sequence_index, (
      sequence,
      wild_type_prediction) in enumerate(zip(X, wild_type_predictions)):
    mutated_sequences = np.repeat(
        sequence[np.newaxis], np.prod(sequence.shape), axis=0)
    # remove wild-type
    arange = np.arange(len(mutated_sequences))
    horizontal_cycle = np.tile(
        np.arange(sequence.shape[-1]), sequence.shape[-2])
    mutated_sequences[arange, :, :, horizontal_cycle] = 0
    # add mutant
    vertical_repeat = np.repeat(
        np.arange(sequence.shape[-2]), sequence.shape[-1])
    mutated_sequences[arange, :, vertical_repeat, horizontal_cycle] = 1
    # make mutant predictions
    mutated_predictions = model.predict(NumpyDataset(mutated_sequences))
    mutated_predictions = mutated_predictions.reshape(sequence.shape +
                                                      (model.num_tasks,))
    mutagenesis_scores[
        sequence_index] = wild_type_prediction - mutated_predictions
  return np.rollaxis(mutagenesis_scores, -1)
+19 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@ LETTERS = "ACGT"

from deepchem.metrics.genomic_metrics import get_motif_scores
from deepchem.metrics.genomic_metrics import get_pssm_scores
from deepchem.metrics.genomic_metrics import in_silico_mutagenesis


class TestGenomicMetrics(unittest.TestCase):
@@ -44,3 +45,21 @@ class TestGenomicMetrics(unittest.TestCase):

    pssm_scores = get_pssm_scores(sequences, pssm)
    self.assertEqual(pssm_scores.shape, (3, 5))

  def test_in_silico_mutagenesis(self):
    """Test in-silico mutagenesis returns correct shape."""
    # Construct and train SequenceDNN model
    X = np.random.rand(10, 1, 4, 50)
    y = np.random.randint(0, 2, size=(10, 1))
    dataset = dc.data.NumpyDataset(X, y)
    model = dc.models.SequenceDNN(
        50, "binary_crossentropy", num_filters=[1, 1], kernel_size=[15, 15])
    model.fit(dataset, nb_epoch=1)

    # Call in-silico mutagenesis
    mutagenesis_scores = in_silico_mutagenesis(model, X)
    print("mutagenesis_scores.shape")
    print(mutagenesis_scores.shape)


+4 −4
Original line number Diff line number Diff line
[metadata]
name = deepchem
author = Bharath Ramsundar, Evan Feinberg, Pande Lab Stanford
summary = Deep-learning models for drug discovery and quantum chemistry
home-page = https://github.com/pandegroup/deepchem
author = DeepChem contributors 
summary = Deep-learning models for drug discovery, quantum chemistry, and the life sciences.
home-page = https://github.com/deepchem/deepchem
license = MIT
version = 2.0.1
version = 2.1.0
classifier =
    Development Status :: 4 - Beta
    Environment :: Console