Unverified Commit d997acf3 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #2031 from nd-02110114/improve-mor-type

Improve type of some utils function
parents ce379457 6621447b
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ from typing import List, Optional, Dict, Tuple, Any, Sequence, Union
from deepchem.utils.typing import OneOrMany
from deepchem.utils.save import load_csv_files, load_json_files
from deepchem.utils.save import load_sdf_files
from deepchem.utils.genomics import encode_fasta_sequence
from deepchem.utils.genomics_utils import encode_bio_sequence
from deepchem.feat import UserDefinedFeaturizer, Featurizer
from deepchem.data import Dataset, DiskDataset, NumpyDataset, ImageDataset
import zipfile
@@ -725,7 +725,7 @@ class FASTALoader(DataLoader):

    def shard_generator():
      for input_file in input_files:
        X = encode_fasta_sequence(input_file)
        X = encode_bio_sequence(input_file)
        ids = np.ones(len(X))
        # (X, y, w, ids)
        yield X, None, None, ids
+2 −2
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ from deepchem.models import Model
from deepchem.utils.rdkit_util import load_molecule
from deepchem.utils.coordinate_box_utils \
  import CoordinateBox, get_face_boxes, merge_overlapping_boxes
from deepchem.utils.fragment_util import get_contact_atom_indices
from deepchem.utils.fragment_utils import get_contact_atom_indices

logger = logging.getLogger(__name__)

@@ -90,7 +90,7 @@ class ConvexHullPocketFinder(BindingPocketFinder):

    Parameters
    ----------
    scoring_model: `dc.models.Model`, optional
    scoring_model: Model, optional (default None)
      If specified, use this model to prune pockets.
    pad: float, optional (default 5.0)
      The number of angstroms to pad around a binding pocket's atoms
+15 −10
Original line number Diff line number Diff line
@@ -3,15 +3,17 @@ Docks Molecular Complexes
"""
import logging
import tempfile
from typing import cast, Optional, Tuple
from typing import cast, Generator, Optional, Tuple, Union
import numpy as np

from deepchem.utils.typing import RDKitMol
from deepchem.models import Model
from deepchem.feat import ComplexFeaturizer
from deepchem.data import NumpyDataset
from deepchem.dock import PoseGenerator

logger = logging.getLogger(__name__)
POSED_COMPLEX = Tuple[RDKitMol, RDKitMol]


class Docker(object):
@@ -36,11 +38,11 @@ class Docker(object):

    Parameters
    ----------
    pose_generator: `PoseGenerator`
    pose_generator: PoseGenerator
      The pose generator to use for this model
    featurizer: `ComplexFeaturizer`, optional (default None)
    featurizer: ComplexFeaturizer, optional (default None)
      Featurizer associated with `scoring_model`
    scoring_model: `Model`, optional (default None)
    scoring_model: Model, optional (default None)
      Should make predictions on molecular complex.
    """
    if ((featurizer is not None and scoring_model is None) or
@@ -60,7 +62,9 @@ class Docker(object):
           num_modes: int = 9,
           num_pockets: Optional[int] = None,
           out_dir: Optional[str] = None,
           use_pose_generator_scores: bool = False):
           use_pose_generator_scores: bool = False
          ) -> Union[Generator[POSED_COMPLEX, None, None], Generator[Tuple[
              POSED_COMPLEX, float], None, None]]:
    """Generic docking function.

    This docking function uses this object's featurizer, pose
@@ -69,13 +73,13 @@ class Docker(object):

    Parameters
    ----------
    molecular_complex: Tuple[str]
    molecular_complex: Tuple[str, str]
      A representation of a molecular complex. This tuple is
      (protein_file, ligand_file).
    centroid: np.ndarray, optional (default None)
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional (default None)
      Of shape `(3,)` holding the size of the box to dock. If not
      A numpy array of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
      Tells pose generator how exhaustive it should be with pose
@@ -96,6 +100,7 @@ class Docker(object):

    Returns
    -------
    Generator[Tuple[`posed_complex`, `score`]] or Generator[`posed_complex`]
      A generator. If `use_pose_generator_scores==True` or
      `self.scoring_model` is set, then will yield tuples
      `(posed_complex, score)`. Else will yield `posed_complex`.
+18 −14
Original line number Diff line number Diff line
@@ -9,16 +9,17 @@ import tarfile
import numpy as np
from subprocess import call
from subprocess import check_output
from typing import Optional, Tuple
from typing import List, Optional, Tuple, Union

from deepchem.dock.binding_pocket import BindingPocketFinder
from deepchem.utils import download_url, get_data_dir
from deepchem.utils.mol_xyz_util import get_molecule_range
from deepchem.utils.geometry_utils import compute_centroid
from deepchem.utils.typing import RDKitMol
from deepchem.utils.geometry_utils import compute_centroid, compute_protein_range
from deepchem.utils.rdkit_util import load_molecule, write_molecule
from deepchem.utils.vina_utils import load_docked_ligands, write_vina_conf

logger = logging.getLogger(__name__)
DOCKED_POSES = List[Tuple[RDKitMol, RDKitMol]]


class PoseGenerator(object):
@@ -52,7 +53,7 @@ class PoseGenerator(object):
    centroid: np.ndarray, optional (default None)
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional (default None)
      Of shape `(3,)` holding the size of the box to dock. If not
      A numpy array of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
      Tells pose generator how exhaustive it should be with pose
@@ -102,7 +103,7 @@ class VinaPoseGenerator(PoseGenerator):
    sixty_four_bits: bool, optional (default True)
      Specifies whether this is a 64-bit machine. Needed to download
      the correct executable.
    pocket_finder: object, optional (default None)
    pocket_finder: BindingPocketFinder, optional (default None)
      If specified should be an instance of
      `dc.dock.BindingPocketFinder`.
    """
@@ -156,20 +157,22 @@ class VinaPoseGenerator(PoseGenerator):
                     num_modes: int = 9,
                     num_pockets: Optional[int] = None,
                     out_dir: Optional[str] = None,
                     generate_scores: bool = False):
                     generate_scores: bool = False
                    ) -> Union[Tuple[DOCKED_POSES, List[float]], DOCKED_POSES]:
    """Generates the docked complex and outputs files for docked complex.

    TODO: How can this work on Windows? We need to install a .msi file and invoke it correctly from Python for this to work.
    TODO: How can this work on Windows? We need to install a .msi file and
    invoke it correctly from Python for this to work.

    Parameters
    ----------
    molecular_complexes: Tuple[str]
    molecular_complexes: Tuple[str, str]
      A representation of a molecular complex. This tuple is
      (protein_file, ligand_file).
    centroid: np.ndarray, optional
      The centroid to dock against. Is computed if not specified.
    box_dims: np.ndarray, optional
      Of shape `(3,)` holding the size of the box to dock. If not
      A numpy array of shape `(3,)` holding the size of the box to dock. If not
      specified is set to size of molecular complex plus 5 angstroms.
    exhaustiveness: int, optional (default 10)
      Tells Autodock Vina how exhaustive it should be with pose
@@ -190,10 +193,11 @@ class VinaPoseGenerator(PoseGenerator):

    Returns
    -------
    Tuple of `(docked_poses, scores)`. `docked_poses` is a list of
    docked molecular complexes. Each entry in this list contains a
    `(protein_mol, ligand_mol)` pair of RDKit molecules. `scores` is a
    list of binding free energies predicted by Vina.
    Tuple[`docked_poses`, `scores`] or `docked_poses`
      Tuple of `(docked_poses, scores)` or `docked_poses`. `docked_poses`
      is a list of docked molecular complexes. Each entry in this list
      contains a `(protein_mol, ligand_mol)` pair of RDKit molecules.
      `scores` is a list of binding free energies predicted by Vina.

    Raises
    ------
@@ -232,7 +236,7 @@ class VinaPoseGenerator(PoseGenerator):
      if self.pocket_finder is None:
        logger.info("Pockets not specified. Will use whole protein to dock")
        protein_centroid = compute_centroid(protein_mol[0])
        protein_range = get_molecule_range(protein_mol[0])
        protein_range = compute_protein_range(protein_mol[0])
        box_dims = protein_range + 5.0
        centroids, dimensions = [protein_centroid], [box_dims]
      else:
+12 −12
Original line number Diff line number Diff line
@@ -10,9 +10,9 @@ def pairwise_distances(coords1: np.ndarray, coords2: np.ndarray) -> np.ndarray:
  Parameters
  ----------
  coords1: np.ndarray
    Of shape `(N, 3)`
    A numpy array of shape `(N, 3)`
  coords2: np.ndarray
    Of shape `(M, 3)`
    A numpy array of shape `(M, 3)`

  Returns
  -------
@@ -28,7 +28,7 @@ def cutoff_filter(d: np.ndarray, x: np.ndarray, cutoff=8.0) -> np.ndarray:
  Parameters
  ----------
  d: np.ndarray
    Pairwise distances matrix. Of shape `(N, M)`
    Pairwise distances matrix. A numpy array of shape `(N, M)`
  x: np.ndarray
    Matrix of shape `(N, M)`
  cutoff: float, optional (default 8)
@@ -48,7 +48,7 @@ def vina_nonlinearity(c: np.ndarray, w: float, Nrot: int) -> np.ndarray:
  Parameters
  ----------
  c: np.ndarray
    Of shape `(N, M)`
    A numpy array of shape `(N, M)`
  w: float
    Weighting term
  Nrot: int
@@ -69,7 +69,7 @@ def vina_repulsion(d: np.ndarray) -> np.ndarray:
  Parameters
  ----------
  d: np.ndarray
    Of shape `(N, M)`.
    A numpy array of shape `(N, M)`.

  Returns
  -------
@@ -87,7 +87,7 @@ def vina_hydrophobic(d: np.ndarray) -> np.ndarray:
  Parameters
  ----------
  d: np.ndarray
    Of shape `(N, M)`.
    A numpy array of shape `(N, M)`.

  Returns
  -------
@@ -113,7 +113,7 @@ def vina_hbond(d: np.ndarray) -> np.ndarray:
  Parameters
  ----------
  d: np.ndarray
    Of shape `(N, M)`.
    A numpy array of shape `(N, M)`.

  Returns
  -------
@@ -140,7 +140,7 @@ def vina_gaussian_first(d: np.ndarray) -> np.ndarray:
  Parameters
  ----------
  d: np.ndarray
    Of shape `(N, M)`.
    A numpy array of shape `(N, M)`.

  Returns
  -------
@@ -165,7 +165,7 @@ def vina_gaussian_second(d: np.ndarray) -> np.ndarray:
  Parameters
  ----------
  d: np.ndarray
    Of shape `(N, M)`.
    A numpy array of shape `(N, M)`.

  Returns
  -------
@@ -188,9 +188,9 @@ def weighted_linear_sum(w: np.ndarray, x: np.ndarray) -> np.ndarray:
  Parameters
  ----------
  w: np.ndarray
    Of shape `(N,)`
    A numpy array of shape `(N,)`
  x: np.ndarray
    Of shape `(N,)`
    A numpy array of shape `(N,)`

  Returns
  -------
@@ -211,7 +211,7 @@ def vina_energy_term(coords1: np.ndarray, coords2: np.ndarray,
  coords2: np.ndarray
    Molecular coordinates of shape `(M, 3)`
  weights: np.ndarray
    Of shape `(5,)`
    A numpy array of shape `(5,)`
  wrot: float
    The scaling factor for nonlinearity
  Nrot: int
Loading