Commit 8b823d99 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Changes

parent fd364637
Loading
Loading
Loading
Loading
+34 −9
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ import numpy as np
import os
import tempfile
from subprocess import call
from deepchem.data import NumpyDataset

logger = logging.getLogger(__name__)

@@ -36,6 +37,10 @@ class Docker(object):
    scoring_model: `Model`
      Should make predictions on molecular complex.
    """
    if ((featurizer is not None and scoring_model is None) or
        (featurizer is None and scoring_model is not None)):
      raise ValueError(
          "featurizer/scoring_model must both be set or must both be None.")
    self.base_dir = tempfile.mkdtemp()
    self.pose_generator = pose_generator
    self.featurizer = featurizer
@@ -48,18 +53,23 @@ class Docker(object):
           exhaustiveness=10,
           num_modes=9,
           num_pockets=None,
           out_dir=None):
    """Docks using Vina and RF.
           out_dir=None,
           use_pose_generator_scores=False):
    """Generic docking function.

    This docking function uses this object's featurizer, pose
    generator, and scoring model to make docking predictions. This
    function is written in generic style so  

    Parameters
    ----------
    molecular_complex: Object
      Some representation of a molecular complex.
    exhaustiveness: int, optional (default 10)
      Tells Autodock Vina how exhaustive it should be with pose
      Tells pose generator how exhaustive it should be with pose
      generation.
    num_modes: int, optional (default 9)
      Tells Autodock Vina how many binding modes it should generate at
      Tells pose generator how many binding modes it should generate at
      each invocation.
    num_pockets: int, optional (default None)
      If specified, `self.pocket_finder` must be set. Will only
@@ -71,7 +81,17 @@ class Docker(object):
      If `True`, ask pose generator to generate scores. This cannot be
      `True` if `self.featurizer` and `self.scoring_model` are set
      since those will be used to generate scores in that case. 

    Returns
    -------
    A generator. If `use_pose_generator_scores==True` or
    `self.scoring_model` is set, then will yield tuples
    `(posed_complex, score)`. Else will yield `posed_complex`.
    """
    if self.scoring_model is not None and use_pose_generator_scores:
      raise ValueError(
          "Cannot set use_pose_generator_scores=True when self.scoring_model is set (since both generator scores for complexes)."
      )
    outputs = self.pose_generator.generate_poses(
        molecular_complex,
        centroid=centroid,
@@ -85,12 +105,17 @@ class Docker(object):
      complexes, scores = outputs
    else:
      complexes = outputs
    # We know use_pose_generator_scores == False in this case
    if self.scoring_model is not None:
      for posed_complex in complexes:
      if self.featurizer is not None:
        # TODO: How to handle the failure here?
        features, _ = self.featurizer.featurize_complexes([molecular_complex])
        dataset = NumpyDataset(X=features)
        score = self.model.predict(dataset)
        yield (score, posed_complex)
        score = self.scoring_model.predict(dataset)
        yield (posed_complex, score)
    elif use_pose_generator_scores:
      for posed_complex, score in zip(complexes, scores):
        yield (posed_complex, score)
    else:
      for posed_complex in complexes:
        yield posed_complex
+19 −6
Original line number Diff line number Diff line
"""
Generates protein-ligand docked poses using Autodock Vina.
Generates protein-ligand docked poses.
"""
import platform
import deepchem
@@ -37,7 +37,8 @@ class PoseGenerator(object):
                     exhaustiveness=10,
                     num_modes=9,
                     num_pockets=None,
                     out_dir=None):
                     out_dir=None,
                     generate_scores=False):
    """Generates a list of low energy poses for molecular complex

    Parameters
@@ -50,10 +51,10 @@ class PoseGenerator(object):
      Of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
      Tells Autodock Vina how exhaustive it should be with pose
      Tells pose generator how exhaustive it should be with pose
      generation.
    num_modes: int, optional (default 9)
      Tells Autodock Vina how many binding modes it should generate at
      Tells pose generator how many binding modes it should generate at
      each invocation.
    num_pockets: int, optional (default None)
      If specified, `self.pocket_finder` must be set. Will only
@@ -61,6 +62,10 @@ class PoseGenerator(object):
      `self.pocket_finder`.
    out_dir: str, optional
      If specified, write generated poses to this directory.
    generate_score: bool, optional (default False)
      If `True`, the pose generator will return scores for complexes.
      This is used typically when invoking external docking programs
      that compute scores. 

    Returns
    -------
@@ -137,7 +142,8 @@ class VinaPoseGenerator(PoseGenerator):
                     exhaustiveness=10,
                     num_modes=9,
                     num_pockets=None,
                     out_dir=None):
                     out_dir=None,
                     generate_scores=False):
    """Generates the docked complex and outputs files for docked complex.

    TODO: How can this work on Windows? We need to install a .msi file and invoke it correctly from Python for this to work.
@@ -163,6 +169,10 @@ class VinaPoseGenerator(PoseGenerator):
      `self.pocket_finder`.
    out_dir: str, optional
      If specified, write generated poses to this directory.
    generate_score: bool, optional (default False)
      If `True`, the pose generator will return scores for complexes.
      This is used typically when invoking external docking programs
      that compute scores. 

    Returns
    -------
@@ -274,4 +284,7 @@ class VinaPoseGenerator(PoseGenerator):
      docked_complexes += [(protein_mol[1], ligand) for ligand in ligands]
      all_scores += scores

    if generate_scores:
      return docked_complexes, all_scores
    else:
      return docked_complexes
+17 −1
Original line number Diff line number Diff line
@@ -73,12 +73,16 @@ def vina_repulsion(d):
  -------
  A `(N, M)` array with repulsion terms.
  """
  return np.where(d >= 0, d**2, np.zeros_like(d))
  return np.where(d < 0, d**2, np.zeros_like(d))


def vina_hydrophobic(d):
  """Computes Autodock Vina's hydrophobic interaction term.

  Here, d is the set of surface distances as defined in:

  Jain, Ajay N. "Scoring noncovalent protein-ligand interactions: a continuous differentiable function tuned to compute binding affinities." Journal of computer-aided molecular design 10.5 (1996): 427-440.

  Parameters
  ----------
  d: np.ndarray
@@ -97,6 +101,10 @@ def vina_hydrophobic(d):
def vina_hbond(d):
  """Computes Autodock Vina's hydrogen bond interaction term.

  Here, d is the set of surface distances as defined in:

  Jain, Ajay N. "Scoring noncovalent protein-ligand interactions: a continuous differentiable function tuned to compute binding affinities." Journal of computer-aided molecular design 10.5 (1996): 427-440.

  Parameters
  ----------
  d: np.ndarray
@@ -116,6 +124,10 @@ def vina_hbond(d):
def vina_gaussian_first(d):
  """Computes Autodock Vina's first Gaussian interaction term.

  Here, d is the set of surface distances as defined in:

  Jain, Ajay N. "Scoring noncovalent protein-ligand interactions: a continuous differentiable function tuned to compute binding affinities." Journal of computer-aided molecular design 10.5 (1996): 427-440.

  Parameters
  ----------
  d: np.ndarray
@@ -132,6 +144,10 @@ def vina_gaussian_first(d):
def vina_gaussian_second(d):
  """Computes Autodock Vina's second Gaussian interaction term.

  Here, d is the set of surface distances as defined in:

  Jain, Ajay N. "Scoring noncovalent protein-ligand interactions: a continuous differentiable function tuned to compute binding affinities." Journal of computer-aided molecular design 10.5 (1996): 427-440.

  Parameters
  ----------
  d: np.ndarray
+49 −1
Original line number Diff line number Diff line
@@ -6,8 +6,12 @@ import sys
import unittest
import pytest
import logging
import numpy as np
import deepchem as dc
from deepchem.dock.binding_pocket import ConvexHullPocketFinder
from deepchem.feat import ComplexFeaturizer
from deepchem.models import Model
from deepchem.dock.pose_generation import PoseGenerator


class TestDocking(unittest.TestCase):
@@ -41,6 +45,24 @@ class TestDocking(unittest.TestCase):
    # Check only one output since num_modes==1
    assert len(list(docked_outputs)) == 1

  @pytest.mark.slow
  def test_docker_pose_generator_scores(self):
    """Test that Docker can get scores from pose_generator."""
    # We provide no scoring model so the docker won't score
    vpg = dc.dock.VinaPoseGenerator()
    docker = dc.dock.Docker(vpg)
    docked_outputs = docker.dock(
        (self.protein_file, self.ligand_file),
        exhaustiveness=1,
        num_modes=1,
        out_dir="/tmp",
        use_pose_generator_scores=True)

    # Check only one output since num_modes==1
    docked_outputs = list(docked_outputs)
    assert len(docked_outputs) == 1
    assert len(docked_outputs[0]) == 2

  @pytest.mark.slow
  def test_docker_specified_pocket(self):
    """Test that Docker can dock into spec. pocket."""
@@ -51,7 +73,7 @@ class TestDocking(unittest.TestCase):
    docked_outputs = docker.dock(
        (self.protein_file, self.ligand_file),
        centroid=(10, 10, 10),
        box_dims=(1, 1, 1),
        box_dims=(10, 10, 10),
        exhaustiveness=1,
        num_modes=1,
        out_dir="/tmp")
@@ -76,3 +98,29 @@ class TestDocking(unittest.TestCase):

    # Check returned files exist
    assert len(list(docked_outputs)) == 1

  @attr("slow")
  def test_scoring_model_and_featurizer(self):
    """Test that scoring model and featurizer are invoked correctly."""

    class DummyFeaturizer(ComplexFeaturizer):

      def featurize_complexes(self, complexes, *args, **kwargs):
        return np.zeros((len(complexes), 5)), None

    class DummyModel(Model):

      def predict(self, dataset, *args, **kwargs):
        return np.zeros(len(dataset))

    class DummyPoseGenerator(PoseGenerator):

      def generate_poses(self, *args, **kwargs):
        return [None]

    featurizer = DummyFeaturizer()
    scoring_model = DummyModel()
    pose_generator = DummyPoseGenerator()
    docker = dc.dock.Docker(pose_generator, featurizer, scoring_model)
    outputs = docker.dock(None)
    assert list(outputs) == [(None, np.array([0.]))]
+36 −5
Original line number Diff line number Diff line
@@ -28,8 +28,8 @@ class TestPoseGeneration(unittest.TestCase):
    vpg = dc.dock.VinaPoseGenerator(pocket_finder=pocket_finder)

  @pytest.mark.slow
  def test_vina_poses(self):
    """Test that VinaPoseGenerator creates pose files.
  def test_vina_poses_and_scores(self):
    """Test that VinaPoseGenerator generates poses and scores

    This test takes some time to run, about a minute and a half on
    development laptop.
@@ -45,7 +45,8 @@ class TestPoseGeneration(unittest.TestCase):
        (protein_file, ligand_file),
        exhaustiveness=1,
        num_modes=1,
        out_dir="/tmp")
        out_dir="/tmp",
        generate_scores=True)

    assert len(poses) == 1
    assert len(scores) == 1
@@ -55,6 +56,34 @@ class TestPoseGeneration(unittest.TestCase):
    assert isinstance(ligand, Chem.Mol)

  @pytest.mark.slow
  def test_vina_poses_no_scores(self):
    """Test that VinaPoseGenerator generates poses.

    This test takes some time to run, about a minute and a half on
    development laptop.
    """
    # Let's turn on logging since this test will run for a while
    logging.basicConfig(level=logging.INFO)
    current_dir = os.path.dirname(os.path.realpath(__file__))
    protein_file = os.path.join(current_dir, "1jld_protein.pdb")
    ligand_file = os.path.join(current_dir, "1jld_ligand.sdf")

    vpg = dc.dock.VinaPoseGenerator(pocket_finder=None)
    poses = vpg.generate_poses(
        (protein_file, ligand_file),
        exhaustiveness=1,
        num_modes=1,
        out_dir="/tmp",
        generate_scores=False)

    assert len(poses) == 1
    protein, ligand = poses[0]
    from rdkit import Chem
    assert isinstance(protein, Chem.Mol)
    assert isinstance(ligand, Chem.Mol)

  @attr("slow")
>>>>>>> Changes
  def test_vina_pose_specified_centroid(self):
    """Test that VinaPoseGenerator creates pose files with specified centroid/box dims.

@@ -76,7 +105,8 @@ class TestPoseGeneration(unittest.TestCase):
        box_dims=box_dims,
        exhaustiveness=1,
        num_modes=1,
        out_dir="/tmp")
        out_dir="/tmp",
        generate_scores=True)

    assert len(poses) == 1
    assert len(scores) == 1
@@ -106,7 +136,8 @@ class TestPoseGeneration(unittest.TestCase):
        exhaustiveness=1,
        num_modes=1,
        num_pockets=2,
        out_dir="/tmp")
        out_dir="/tmp",
        generate_scores=True)

    assert len(poses) == 2
    assert len(scores) == 2
Loading