Commit 774156f9 authored by nd-02110114's avatar nd-02110114
Browse files

♻️ add typing

parent e42d3c73
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ install:
  - bash scripts/install_deepchem_conda.sh deepchem
  - conda activate deepchem
  - python setup.py install
  - pip install coveralls mypy yapf==0.22.0
  - pip install coveralls mypy flake8 yapf==0.22.0

script:
  - bash devtools/run_yapf.sh
+13 −7
Original line number Diff line number Diff line
@@ -3,6 +3,9 @@ Computes putative binding pockets on protein.
"""
import logging
import numpy as np
from typing import Any, Optional, Tuple

from deepchem.models import Model
from deepchem.utils import rdkit_util
from deepchem.utils import coordinate_box_utils as box_utils
from deepchem.utils.fragment_util import get_contact_atom_indices
@@ -10,7 +13,10 @@ from deepchem.utils.fragment_util import get_contact_atom_indices
logger = logging.getLogger(__name__)


def extract_active_site(protein_file, ligand_file, cutoff=4):
def extract_active_site(protein_file: str,
                        ligand_file: str,
                        cutoff: float = 4.0
                       ) -> Tuple[box_utils.CoordinateBox, np.ndarray]:
  """Extracts a box for the active site.

  Parameters
@@ -19,7 +25,7 @@ def extract_active_site(protein_file, ligand_file, cutoff=4):
    Location of protein PDB
  ligand_file: str
    Location of ligand input file
  cutoff: int, optional
  cutoff: float, optional (default 4.0)
    The distance in angstroms from the protein pocket to
    consider for featurization.

@@ -61,7 +67,7 @@ class BindingPocketFinder(object):
  technique to be used.
  """

  def find_pockets(self, molecule):
  def find_pockets(self, molecule: Any):
    """Finds potential binding pockets in proteins.

    Parameters
@@ -78,21 +84,21 @@ class ConvexHullPocketFinder(BindingPocketFinder):
  Based on https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4112621/pdf/1472-6807-14-18.pdf
  """

  def __init__(self, scoring_model=None, pad=5):
  def __init__(self, scoring_model: Optional[Model] = None, pad: int = 5):
    """Initialize the pocket finder.

    Parameters
    ----------
    scoring_model: `dc.models.Model`, optional
      If specified, use this model to prune pockets.
    pad: float, optional
    pad: int, optional (default 5)
      The number of angstroms to pad around a binding pocket's atoms
      to get a binding pocket box.
    """
    self.scoring_model = scoring_model
    self.pad = pad

  def find_all_pockets(self, protein_file):
  def find_all_pockets(self, protein_file: str):
    """Find list of binding pockets on protein.

    Parameters
@@ -103,7 +109,7 @@ class ConvexHullPocketFinder(BindingPocketFinder):
    coords, _ = rdkit_util.load_molecule(protein_file)
    return box_utils.get_face_boxes(coords, self.pad)

  def find_pockets(self, macromolecule_file):
  def find_pockets(self, macromolecule_file: str):
    """Find list of suitable binding pockets on protein.

    This function computes putative binding pockets on this protein.
+25 −12
Original line number Diff line number Diff line
@@ -3,7 +3,12 @@ Docks Molecular Complexes
"""
import logging
import tempfile
from typing import Any, Optional, cast

from deepchem.models import Model
from deepchem.feat import ComplexFeaturizer
from deepchem.data import NumpyDataset
from deepchem.dock import PoseGenerator

logger = logging.getLogger(__name__)

@@ -22,16 +27,19 @@ class Docker(object):
  generation and scoring classes that are provided to this class.
  """

  def __init__(self, pose_generator, featurizer=None, scoring_model=None):
  def __init__(self,
               pose_generator: PoseGenerator,
               featurizer: Optional[ComplexFeaturizer] = None,
               scoring_model: Optional[Model] = None):
    """Builds model.

    Parameters
    ----------
    pose_generator: `PoseGenerator`
      The pose generator to use for this model
    featurizer: `ComplexFeaturizer`
    featurizer: `ComplexFeaturizer`, optional (default None)
      Featurizer associated with `scoring_model`
    scoring_model: `Model`
    scoring_model: `Model`, optional (default None)
      Should make predictions on molecular complex.
    """
    if ((featurizer is not None and scoring_model is None) or
@@ -44,14 +52,14 @@ class Docker(object):
    self.scoring_model = scoring_model

  def dock(self,
           molecular_complex,
           centroid=None,
           box_dims=None,
           exhaustiveness=10,
           num_modes=9,
           num_pockets=None,
           out_dir=None,
           use_pose_generator_scores=False):
           molecular_complex: Any,
           centroid: Optional[int] = None,
           box_dims: Optional[int] = None,
           exhaustiveness: int = 10,
           num_modes: int = 9,
           num_pockets: Optional[int] = None,
           out_dir: Optional[str] = None,
           use_pose_generator_scores: bool = False):
    """Generic docking function.

    This docking function uses this object's featurizer, pose
@@ -89,6 +97,7 @@ class Docker(object):
      raise ValueError(
          "Cannot set use_pose_generator_scores=True when self.scoring_model is set (since both generator scores for complexes)."
      )

    outputs = self.pose_generator.generate_poses(
        molecular_complex,
        centroid=centroid,
@@ -102,11 +111,15 @@ class Docker(object):
      complexes, scores = outputs
    else:
      complexes = outputs

    # We know use_pose_generator_scores == False in this case
    if self.scoring_model is not None:
      for posed_complex in complexes:
        # NOTE: this casting is workaround. This line doesn't effect anything to the runtime
        self.featurizer = cast(ComplexFeaturizer, self.featurizer)
        # TODO: How to handle the failure here?
        features, _ = self.featurizer.featurize([molecular_complex])
        features, _ = self.featurizer.featurize(  # type: ignore
            [molecular_complex])
        dataset = NumpyDataset(X=features)
        score = self.scoring_model.predict(dataset)
        yield (posed_complex, score)
+35 −26
Original line number Diff line number Diff line
@@ -6,8 +6,12 @@ import logging
import os
import tempfile
import tarfile
import numpy as np
from subprocess import call
from subprocess import check_output
from typing import Optional, Tuple

from deepchem.dock.binding_pocket import BindingPocketFinder
from deepchem.utils import rdkit_util
from deepchem.utils import mol_xyz_util
from deepchem.utils import geometry_utils
@@ -31,23 +35,24 @@ class PoseGenerator(object):
  """

  def generate_poses(self,
                     molecular_complex,
                     centroid=None,
                     box_dims=None,
                     exhaustiveness=10,
                     num_modes=9,
                     num_pockets=None,
                     out_dir=None,
                     generate_scores=False):
                     molecular_complex: Tuple[str, str],
                     centroid: Optional[np.ndarray] = None,
                     box_dims: Optional[np.ndarray] = None,
                     exhaustiveness: int = 10,
                     num_modes: int = 9,
                     num_pockets: Optional[int] = None,
                     out_dir: Optional[str] = None,
                     generate_scores: bool = False):
    """Generates a list of low energy poses for molecular complex

    Parameters
    ----------
    molecular_complexes: list
      A representation of a molecular complex.
    centroid: np.ndarray, optional
    molecular_complexes: Tuple[str, str]
      A representation of a molecular complex. This is a tuple of
      (protein_file, ligand_file).
    centroid: np.ndarray, optional (default None)
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional
    box_dims: np.ndarray, optional (default None)
      Of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
@@ -60,7 +65,7 @@ class PoseGenerator(object):
      If specified, `self.pocket_finder` must be set. Will only
      generate poses for the first `num_pockets` returned by
      `self.pocket_finder`.
    out_dir: str, optional
    out_dir: str, optional (default None)
      If specified, write generated poses to this directory.
    generate_score: bool, optional (default False)
      If `True`, the pose generator will return scores for complexes.
@@ -88,7 +93,9 @@ class VinaPoseGenerator(PoseGenerator):
  This class requires RDKit to be installed.
  """

  def __init__(self, sixty_four_bits=True, pocket_finder=None):
  def __init__(self,
               sixty_four_bits: bool = True,
               pocket_finder: Optional[BindingPocketFinder] = None):
    """Initializes Vina Pose Generator

    Parameters
@@ -143,22 +150,23 @@ class VinaPoseGenerator(PoseGenerator):
      os.remove(downloaded_file)

  def generate_poses(self,
                     molecular_complex,
                     centroid=None,
                     box_dims=None,
                     exhaustiveness=10,
                     num_modes=9,
                     num_pockets=None,
                     out_dir=None,
                     generate_scores=False):
                     molecular_complex: Tuple[str, str],
                     centroid: Optional[np.ndarray] = None,
                     box_dims: Optional[np.ndarray] = None,
                     exhaustiveness: int = 10,
                     num_modes: int = 9,
                     num_pockets: Optional[int] = None,
                     out_dir: Optional[str] = None,
                     generate_scores: bool = False):
    """Generates the docked complex and outputs files for docked complex.

    TODO: How can this work on Windows? We need to install a .msi file and invoke it correctly from Python for this to work.

    Parameters
    ----------
    molecular_complexes: list
      A representation of a molecular complex.
    molecular_complexes: Tuple[str]
      A representation of a molecular complex. This is a tuple of
      (protein_file, ligand_file).
    centroid: np.ndarray, optional
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional
@@ -290,8 +298,9 @@ class VinaPoseGenerator(PoseGenerator):
      else:
        # I'm not sure why specifying the args as a list fails on other platforms,
        # but for some reason it only works if I pass it as a string.
        args = "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file,
                                                     log_file, out_pdbqt)
        args = "%s --config %s --log %s --out %s" % (  # type: ignore
            self.vina_cmd, conf_file, log_file, out_pdbqt)
      # FIXME: We should use `subprocess.run` instead of `call`
      call(args, shell=True)
      ligands, scores = vina_utils.load_docked_ligands(out_pdbqt)
      docked_complexes += [(protein_mol[1], ligand) for ligand in ligands]
+48 −28
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@ Utilities to score protein-ligand poses using DeepChem.
import numpy as np


def pairwise_distances(coords1, coords2):
def pairwise_distances(coords1: np.ndarray, coords2: np.ndarray) -> np.ndarray:
  """Returns matrix of pairwise Euclidean distances.

  Parameters
@@ -16,12 +16,13 @@ def pairwise_distances(coords1, coords2):

  Returns
  -------
  np.ndarray
    A `(N,M)` array with pairwise distances.
  """
  return np.sum((coords1[None, :] - coords2[:, None])**2, -1)**0.5


def cutoff_filter(d, x, cutoff=8.0):
def cutoff_filter(d: np.ndarray, x: np.ndarray, cutoff=8.0) -> np.ndarray:
  """Applies a cutoff filter on pairwise distances

  Parameters
@@ -35,13 +36,13 @@ def cutoff_filter(d, x, cutoff=8.0):

  Returns
  -------
  A `(N,M)` array with values where distance is too large thresholded
  to 0.
  np.ndarray
    A `(N,M)` array with values where distance is too large thresholded to 0.
  """
  return np.where(d < cutoff, x, np.zeros_like(x))


def vina_nonlinearity(c, w, Nrot):
def vina_nonlinearity(c: np.ndarray, w: float, Nrot: int) -> np.ndarray:
  """Computes non-linearity used in Vina.

  Parameters
@@ -55,13 +56,14 @@ def vina_nonlinearity(c, w, Nrot):

  Returns
  -------
  np.ndarray
    A `(N, M)` array with activations under a nonlinearity.
  """
  out_tensor = c / (1 + w * Nrot)
  return out_tensor


def vina_repulsion(d):
def vina_repulsion(d: np.ndarray) -> np.ndarray:
  """Computes Autodock Vina's repulsion interaction term.

  Parameters
@@ -71,17 +73,16 @@ def vina_repulsion(d):

  Returns
  -------
  np.ndarray
    A `(N, M)` array with repulsion terms.
  """
  return np.where(d < 0, d**2, np.zeros_like(d))


def vina_hydrophobic(d):
def vina_hydrophobic(d: np.ndarray) -> np.ndarray:
  """Computes Autodock Vina's hydrophobic interaction term.

  Here, d is the set of surface distances as defined in:

  Jain, Ajay N. "Scoring noncovalent protein-ligand interactions: a continuous differentiable function tuned to compute binding affinities." Journal of computer-aided molecular design 10.5 (1996): 427-440.
  Here, d is the set of surface distances as defined in [1]_

  Parameters
  ----------
@@ -90,20 +91,24 @@ def vina_hydrophobic(d):

  Returns
  -------
  A `(N, M)` array of hydrophoboic interactions in a piecewise linear
  curve.
  np.ndarray
    A `(N, M)` array of hydrophoboic interactions in a piecewise linear curve.

  References
  ----------
  .. [1] Jain, Ajay N. "Scoring noncovalent protein-ligand interactions:
     a continuous differentiable function tuned to compute binding affinities."
     Journal of computer-aided molecular design 10.5 (1996): 427-440.
  """
  out_tensor = np.where(d < 0.5, np.ones_like(d),
                        np.where(d < 1.5, 1.5 - d, np.zeros_like(d)))
  return out_tensor


def vina_hbond(d):
def vina_hbond(d: np.ndarray) -> np.ndarray:
  """Computes Autodock Vina's hydrogen bond interaction term.

  Here, d is the set of surface distances as defined in:

  Jain, Ajay N. "Scoring noncovalent protein-ligand interactions: a continuous differentiable function tuned to compute binding affinities." Journal of computer-aided molecular design 10.5 (1996): 427-440.
  Here, d is the set of surface distances as defined in [1]_

  Parameters
  ----------
@@ -112,8 +117,14 @@ def vina_hbond(d):

  Returns
  -------
  A `(N, M)` array of hydrophoboic interactions in a piecewise linear
  curve.
  np.ndarray
    A `(N, M)` array of hydrophoboic interactions in a piecewise linear curve.

  References
  ----------
  .. [1] Jain, Ajay N. "Scoring noncovalent protein-ligand interactions:
     a continuous differentiable function tuned to compute binding affinities."
     Journal of computer-aided molecular design 10.5 (1996): 427-440.
  """
  out_tensor = np.where(
      d < -0.7, np.ones_like(d),
@@ -121,7 +132,7 @@ def vina_hbond(d):
  return out_tensor


def vina_gaussian_first(d):
def vina_gaussian_first(d: np.ndarray) -> np.ndarray:
  """Computes Autodock Vina's first Gaussian interaction term.

  Here, d is the set of surface distances as defined in [1]_
@@ -133,6 +144,7 @@ def vina_gaussian_first(d):

  Returns
  -------
  np.ndarray
    A `(N, M)` array of gaussian interaction terms.

  References
@@ -145,7 +157,7 @@ def vina_gaussian_first(d):
  return out_tensor


def vina_gaussian_second(d):
def vina_gaussian_second(d: np.ndarray) -> np.ndarray:
  """Computes Autodock Vina's second Gaussian interaction term.

  Here, d is the set of surface distances as defined in [1]_
@@ -157,6 +169,7 @@ def vina_gaussian_second(d):

  Returns
  -------
  np.ndarray
    A `(N, M)` array of gaussian interaction terms.

  References
@@ -169,7 +182,7 @@ def vina_gaussian_second(d):
  return out_tensor


def weighted_linear_sum(w, x):
def weighted_linear_sum(w: np.ndarray, x: np.ndarray) -> np.ndarray:
  """Computes weighted linear sum.

  Parameters
@@ -178,11 +191,17 @@ def weighted_linear_sum(w, x):
    Of shape `(N,)`
  x: np.ndarray
    Of shape `(N,)`

  Returns
  -------
  np.ndarray
    A scalar value
  """
  return np.sum(np.dot(w, x))


def vina_energy_term(coords1, coords2, weights, wrot, Nrot):
def vina_energy_term(coords1: np.ndarray, coords2: np.ndarray,
                     weights: np.ndarray, wrot: float, Nrot: int) -> np.ndarray:
  """Computes the Vina Energy function for two molecular conformations

  Parameters
@@ -200,7 +219,8 @@ def vina_energy_term(coords1, coords2, weights, wrot, Nrot):

  Returns
  -------
  Scalar with energy
  np.ndarray
    A scalar value with free energy
  """
  # TODO(rbharath): The autodock vina source computes surface distances which take into account the van der Waals radius of each atom type.
  dists = pairwise_distances(coords1, coords2)
Loading