:recycle: add typing (774156f9) · Commits · 钟慕尧 / deepchem

.travis.yml

+1 −1

Original line number	Diff line number	Diff line
		@@ -33,7 +33,7 @@ install:
		- bash scripts/install_deepchem_conda.sh deepchem
		- conda activate deepchem
		- python setup.py install
		- pip install coveralls mypy yapf==0.22.0
		- pip install coveralls mypy flake8 yapf==0.22.0

		script:
		- bash devtools/run_yapf.sh

deepchem/dock/binding_pocket.py

+13 −7

Original line number	Diff line number	Diff line
		@@ -3,6 +3,9 @@ Computes putative binding pockets on protein.
		"""
		import logging
		import numpy as np
		from typing import Any, Optional, Tuple

		from deepchem.models import Model
		from deepchem.utils import rdkit_util
		from deepchem.utils import coordinate_box_utils as box_utils
		from deepchem.utils.fragment_util import get_contact_atom_indices
		@@ -10,7 +13,10 @@ from deepchem.utils.fragment_util import get_contact_atom_indices
		logger = logging.getLogger(__name__)


		def extract_active_site(protein_file, ligand_file, cutoff=4):
		def extract_active_site(protein_file: str,
		ligand_file: str,
		cutoff: float = 4.0
		) -> Tuple[box_utils.CoordinateBox, np.ndarray]:
		"""Extracts a box for the active site.

		Parameters
		@@ -19,7 +25,7 @@ def extract_active_site(protein_file, ligand_file, cutoff=4):
		Location of protein PDB
		ligand_file: str
		Location of ligand input file
		cutoff: int, optional
		cutoff: float, optional (default 4.0)
		The distance in angstroms from the protein pocket to
		consider for featurization.

		@@ -61,7 +67,7 @@ class BindingPocketFinder(object):
		technique to be used.
		"""

		def find_pockets(self, molecule):
		def find_pockets(self, molecule: Any):
		"""Finds potential binding pockets in proteins.

		Parameters
		@@ -78,21 +84,21 @@ class ConvexHullPocketFinder(BindingPocketFinder):
		Based on https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4112621/pdf/1472-6807-14-18.pdf
		"""

		def __init__(self, scoring_model=None, pad=5):
		def __init__(self, scoring_model: Optional[Model] = None, pad: int = 5):
		"""Initialize the pocket finder.

		Parameters
		----------
		scoring_model: `dc.models.Model`, optional
		If specified, use this model to prune pockets.
		pad: float, optional
		pad: int, optional (default 5)
		The number of angstroms to pad around a binding pocket's atoms
		to get a binding pocket box.
		"""
		self.scoring_model = scoring_model
		self.pad = pad

		def find_all_pockets(self, protein_file):
		def find_all_pockets(self, protein_file: str):
		"""Find list of binding pockets on protein.

		Parameters
		@@ -103,7 +109,7 @@ class ConvexHullPocketFinder(BindingPocketFinder):
		coords, _ = rdkit_util.load_molecule(protein_file)
		return box_utils.get_face_boxes(coords, self.pad)

		def find_pockets(self, macromolecule_file):
		def find_pockets(self, macromolecule_file: str):
		"""Find list of suitable binding pockets on protein.

		This function computes putative binding pockets on this protein.

deepchem/dock/docking.py

+25 −12

Original line number	Diff line number	Diff line
		@@ -3,7 +3,12 @@ Docks Molecular Complexes
		"""
		import logging
		import tempfile
		from typing import Any, Optional, cast

		from deepchem.models import Model
		from deepchem.feat import ComplexFeaturizer
		from deepchem.data import NumpyDataset
		from deepchem.dock import PoseGenerator

		logger = logging.getLogger(__name__)

		@@ -22,16 +27,19 @@ class Docker(object):
		generation and scoring classes that are provided to this class.
		"""

		def __init__(self, pose_generator, featurizer=None, scoring_model=None):
		def __init__(self,
		pose_generator: PoseGenerator,
		featurizer: Optional[ComplexFeaturizer] = None,
		scoring_model: Optional[Model] = None):
		"""Builds model.

		Parameters
		----------
		pose_generator: `PoseGenerator`
		The pose generator to use for this model
		featurizer: `ComplexFeaturizer`
		featurizer: `ComplexFeaturizer`, optional (default None)
		Featurizer associated with `scoring_model`
		scoring_model: `Model`
		scoring_model: `Model`, optional (default None)
		Should make predictions on molecular complex.
		"""
		if ((featurizer is not None and scoring_model is None) or
		@@ -44,14 +52,14 @@ class Docker(object):
		self.scoring_model = scoring_model

		def dock(self,
		molecular_complex,
		centroid=None,
		box_dims=None,
		exhaustiveness=10,
		num_modes=9,
		num_pockets=None,
		out_dir=None,
		use_pose_generator_scores=False):
		molecular_complex: Any,
		centroid: Optional[int] = None,
		box_dims: Optional[int] = None,
		exhaustiveness: int = 10,
		num_modes: int = 9,
		num_pockets: Optional[int] = None,
		out_dir: Optional[str] = None,
		use_pose_generator_scores: bool = False):
		"""Generic docking function.

		This docking function uses this object's featurizer, pose
		@@ -89,6 +97,7 @@ class Docker(object):
		raise ValueError(
		"Cannot set use_pose_generator_scores=True when self.scoring_model is set (since both generator scores for complexes)."
		)

		outputs = self.pose_generator.generate_poses(
		molecular_complex,
		centroid=centroid,
		@@ -102,11 +111,15 @@ class Docker(object):
		complexes, scores = outputs
		else:
		complexes = outputs

		# We know use_pose_generator_scores == False in this case
		if self.scoring_model is not None:
		for posed_complex in complexes:
		# NOTE: this casting is workaround. This line doesn't effect anything to the runtime
		self.featurizer = cast(ComplexFeaturizer, self.featurizer)
		# TODO: How to handle the failure here?
		features, _ = self.featurizer.featurize([molecular_complex])
		features, _ = self.featurizer.featurize( # type: ignore
		[molecular_complex])
		dataset = NumpyDataset(X=features)
		score = self.scoring_model.predict(dataset)
		yield (posed_complex, score)

deepchem/dock/pose_generation.py

+35 −26

Original line number	Diff line number	Diff line
		@@ -6,8 +6,12 @@ import logging
		import os
		import tempfile
		import tarfile
		import numpy as np
		from subprocess import call
		from subprocess import check_output
		from typing import Optional, Tuple

		from deepchem.dock.binding_pocket import BindingPocketFinder
		from deepchem.utils import rdkit_util
		from deepchem.utils import mol_xyz_util
		from deepchem.utils import geometry_utils
		@@ -31,23 +35,24 @@ class PoseGenerator(object):
		"""

		def generate_poses(self,
		molecular_complex,
		centroid=None,
		box_dims=None,
		exhaustiveness=10,
		num_modes=9,
		num_pockets=None,
		out_dir=None,
		generate_scores=False):
		molecular_complex: Tuple[str, str],
		centroid: Optional[np.ndarray] = None,
		box_dims: Optional[np.ndarray] = None,
		exhaustiveness: int = 10,
		num_modes: int = 9,
		num_pockets: Optional[int] = None,
		out_dir: Optional[str] = None,
		generate_scores: bool = False):
		"""Generates a list of low energy poses for molecular complex

		Parameters
		----------
		molecular_complexes: list
		A representation of a molecular complex.
		centroid: np.ndarray, optional
		molecular_complexes: Tuple[str, str]
		A representation of a molecular complex. This is a tuple of
		(protein_file, ligand_file).
		centroid: np.ndarray, optional (default None)
		The centroid to dock against. Is computed if not specified.
		box_dims: np.ndarray, optional
		box_dims: np.ndarray, optional (default None)
		Of shape `(3,)` holding the size of the box to dock. If not
		specified is set to size of molecular complex plus 5 angstroms.
		exhaustiveness: int, optional (default 10)
		@@ -60,7 +65,7 @@ class PoseGenerator(object):
		If specified, `self.pocket_finder` must be set. Will only
		generate poses for the first `num_pockets` returned by
		`self.pocket_finder`.
		out_dir: str, optional
		out_dir: str, optional (default None)
		If specified, write generated poses to this directory.
		generate_score: bool, optional (default False)
		If `True`, the pose generator will return scores for complexes.
		@@ -88,7 +93,9 @@ class VinaPoseGenerator(PoseGenerator):
		This class requires RDKit to be installed.
		"""

		def __init__(self, sixty_four_bits=True, pocket_finder=None):
		def __init__(self,
		sixty_four_bits: bool = True,
		pocket_finder: Optional[BindingPocketFinder] = None):
		"""Initializes Vina Pose Generator

		Parameters
		@@ -143,22 +150,23 @@ class VinaPoseGenerator(PoseGenerator):
		os.remove(downloaded_file)

		def generate_poses(self,
		molecular_complex,
		centroid=None,
		box_dims=None,
		exhaustiveness=10,
		num_modes=9,
		num_pockets=None,
		out_dir=None,
		generate_scores=False):
		molecular_complex: Tuple[str, str],
		centroid: Optional[np.ndarray] = None,
		box_dims: Optional[np.ndarray] = None,
		exhaustiveness: int = 10,
		num_modes: int = 9,
		num_pockets: Optional[int] = None,
		out_dir: Optional[str] = None,
		generate_scores: bool = False):
		"""Generates the docked complex and outputs files for docked complex.

		TODO: How can this work on Windows? We need to install a .msi file and invoke it correctly from Python for this to work.

		Parameters
		----------
		molecular_complexes: list
		A representation of a molecular complex.
		molecular_complexes: Tuple[str]
		A representation of a molecular complex. This is a tuple of
		(protein_file, ligand_file).
		centroid: np.ndarray, optional
		The centroid to dock against. Is computed if not specified.
		box_dims: np.ndarray, optional
		@@ -290,8 +298,9 @@ class VinaPoseGenerator(PoseGenerator):
		else:
		# I'm not sure why specifying the args as a list fails on other platforms,
		# but for some reason it only works if I pass it as a string.
		args = "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file,
		log_file, out_pdbqt)
		args = "%s --config %s --log %s --out %s" % ( # type: ignore
		self.vina_cmd, conf_file, log_file, out_pdbqt)
		# FIXME: We should use `subprocess.run` instead of `call`
		call(args, shell=True)
		ligands, scores = vina_utils.load_docked_ligands(out_pdbqt)
		docked_complexes += [(protein_mol[1], ligand) for ligand in ligands]

deepchem/dock/pose_scoring.py

+48 −28

Original line number	Diff line number	Diff line
		@@ -4,7 +4,7 @@ Utilities to score protein-ligand poses using DeepChem.
		import numpy as np


		def pairwise_distances(coords1, coords2):
		def pairwise_distances(coords1: np.ndarray, coords2: np.ndarray) -> np.ndarray:
		"""Returns matrix of pairwise Euclidean distances.

		Parameters
		@@ -16,12 +16,13 @@ def pairwise_distances(coords1, coords2):

		Returns
		-------
		np.ndarray
		A `(N,M)` array with pairwise distances.
		"""
		return np.sum((coords1[None, :] - coords2[:, None])2, -1)0.5


		def cutoff_filter(d, x, cutoff=8.0):
		def cutoff_filter(d: np.ndarray, x: np.ndarray, cutoff=8.0) -> np.ndarray:
		"""Applies a cutoff filter on pairwise distances

		Parameters
		@@ -35,13 +36,13 @@ def cutoff_filter(d, x, cutoff=8.0):

		Returns
		-------
		A `(N,M)` array with values where distance is too large thresholded
		to 0.
		np.ndarray
		A `(N,M)` array with values where distance is too large thresholded to 0.
		"""
		return np.where(d < cutoff, x, np.zeros_like(x))


		def vina_nonlinearity(c, w, Nrot):
		def vina_nonlinearity(c: np.ndarray, w: float, Nrot: int) -> np.ndarray:
		"""Computes non-linearity used in Vina.

		Parameters
		@@ -55,13 +56,14 @@ def vina_nonlinearity(c, w, Nrot):

		Returns
		-------
		np.ndarray
		A `(N, M)` array with activations under a nonlinearity.
		"""
		out_tensor = c / (1 + w * Nrot)
		return out_tensor


		def vina_repulsion(d):
		def vina_repulsion(d: np.ndarray) -> np.ndarray:
		"""Computes Autodock Vina's repulsion interaction term.

		Parameters
		@@ -71,17 +73,16 @@ def vina_repulsion(d):

		Returns
		-------
		np.ndarray
		A `(N, M)` array with repulsion terms.
		"""
		return np.where(d < 0, d**2, np.zeros_like(d))


		def vina_hydrophobic(d):
		def vina_hydrophobic(d: np.ndarray) -> np.ndarray:
		"""Computes Autodock Vina's hydrophobic interaction term.

		Here, d is the set of surface distances as defined in:

		Jain, Ajay N. "Scoring noncovalent protein-ligand interactions: a continuous differentiable function tuned to compute binding affinities." Journal of computer-aided molecular design 10.5 (1996): 427-440.
		Here, d is the set of surface distances as defined in [1]_

		Parameters
		----------
		@@ -90,20 +91,24 @@ def vina_hydrophobic(d):

		Returns
		-------
		A `(N, M)` array of hydrophoboic interactions in a piecewise linear
		curve.
		np.ndarray
		A `(N, M)` array of hydrophoboic interactions in a piecewise linear curve.

		References
		----------
		.. [1] Jain, Ajay N. "Scoring noncovalent protein-ligand interactions:
		a continuous differentiable function tuned to compute binding affinities."
		Journal of computer-aided molecular design 10.5 (1996): 427-440.
		"""
		out_tensor = np.where(d < 0.5, np.ones_like(d),
		np.where(d < 1.5, 1.5 - d, np.zeros_like(d)))
		return out_tensor


		def vina_hbond(d):
		def vina_hbond(d: np.ndarray) -> np.ndarray:
		"""Computes Autodock Vina's hydrogen bond interaction term.

		Here, d is the set of surface distances as defined in:

		Jain, Ajay N. "Scoring noncovalent protein-ligand interactions: a continuous differentiable function tuned to compute binding affinities." Journal of computer-aided molecular design 10.5 (1996): 427-440.
		Here, d is the set of surface distances as defined in [1]_

		Parameters
		----------
		@@ -112,8 +117,14 @@ def vina_hbond(d):

		Returns
		-------
		A `(N, M)` array of hydrophoboic interactions in a piecewise linear
		curve.
		np.ndarray
		A `(N, M)` array of hydrophoboic interactions in a piecewise linear curve.

		References
		----------
		.. [1] Jain, Ajay N. "Scoring noncovalent protein-ligand interactions:
		a continuous differentiable function tuned to compute binding affinities."
		Journal of computer-aided molecular design 10.5 (1996): 427-440.
		"""
		out_tensor = np.where(
		d < -0.7, np.ones_like(d),
		@@ -121,7 +132,7 @@ def vina_hbond(d):
		return out_tensor


		def vina_gaussian_first(d):
		def vina_gaussian_first(d: np.ndarray) -> np.ndarray:
		"""Computes Autodock Vina's first Gaussian interaction term.

		Here, d is the set of surface distances as defined in [1]_
		@@ -133,6 +144,7 @@ def vina_gaussian_first(d):

		Returns
		-------
		np.ndarray
		A `(N, M)` array of gaussian interaction terms.

		References
		@@ -145,7 +157,7 @@ def vina_gaussian_first(d):
		return out_tensor


		def vina_gaussian_second(d):
		def vina_gaussian_second(d: np.ndarray) -> np.ndarray:
		"""Computes Autodock Vina's second Gaussian interaction term.

		Here, d is the set of surface distances as defined in [1]_
		@@ -157,6 +169,7 @@ def vina_gaussian_second(d):

		Returns
		-------
		np.ndarray
		A `(N, M)` array of gaussian interaction terms.

		References
		@@ -169,7 +182,7 @@ def vina_gaussian_second(d):
		return out_tensor


		def weighted_linear_sum(w, x):
		def weighted_linear_sum(w: np.ndarray, x: np.ndarray) -> np.ndarray:
		"""Computes weighted linear sum.

		Parameters
		@@ -178,11 +191,17 @@ def weighted_linear_sum(w, x):
		Of shape `(N,)`
		x: np.ndarray
		Of shape `(N,)`

		Returns
		-------
		np.ndarray
		A scalar value
		"""
		return np.sum(np.dot(w, x))


		def vina_energy_term(coords1, coords2, weights, wrot, Nrot):
		def vina_energy_term(coords1: np.ndarray, coords2: np.ndarray,
		weights: np.ndarray, wrot: float, Nrot: int) -> np.ndarray:
		"""Computes the Vina Energy function for two molecular conformations

		Parameters
		@@ -200,7 +219,8 @@ def vina_energy_term(coords1, coords2, weights, wrot, Nrot):

		Returns
		-------
		Scalar with energy
		np.ndarray
		A scalar value with free energy
		"""
		# TODO(rbharath): The autodock vina source computes surface distances which take into account the van der Waals radius of each atom type.
		dists = pairwise_distances(coords1, coords2)

Admin message