Merge pull request #2031 from nd-02110114/improve-mor-type (d997acf3) · Commits · 钟慕尧 / deepchem

deepchem/data/data_loader.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -17,7 +17,7 @@ from typing import List, Optional, Dict, Tuple, Any, Sequence, Union
		from deepchem.utils.typing import OneOrMany
		from deepchem.utils.save import load_csv_files, load_json_files
		from deepchem.utils.save import load_sdf_files
		from deepchem.utils.genomics import encode_fasta_sequence
		from deepchem.utils.genomics_utils import encode_bio_sequence
		from deepchem.feat import UserDefinedFeaturizer, Featurizer
		from deepchem.data import Dataset, DiskDataset, NumpyDataset, ImageDataset
		import zipfile
		@@ -725,7 +725,7 @@ class FASTALoader(DataLoader):

		def shard_generator():
		for input_file in input_files:
		X = encode_fasta_sequence(input_file)
		X = encode_bio_sequence(input_file)
		ids = np.ones(len(X))
		# (X, y, w, ids)
		yield X, None, None, ids

deepchem/dock/binding_pocket.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -9,7 +9,7 @@ from deepchem.models import Model
		from deepchem.utils.rdkit_util import load_molecule
		from deepchem.utils.coordinate_box_utils \
		import CoordinateBox, get_face_boxes, merge_overlapping_boxes
		from deepchem.utils.fragment_util import get_contact_atom_indices
		from deepchem.utils.fragment_utils import get_contact_atom_indices

		logger = logging.getLogger(__name__)

		@@ -90,7 +90,7 @@ class ConvexHullPocketFinder(BindingPocketFinder):

		Parameters
		----------
		scoring_model: `dc.models.Model`, optional
		scoring_model: Model, optional (default None)
		If specified, use this model to prune pockets.
		pad: float, optional (default 5.0)
		The number of angstroms to pad around a binding pocket's atoms

deepchem/dock/docking.py

+15 −10

Original line number	Diff line number	Diff line
		@@ -3,15 +3,17 @@ Docks Molecular Complexes
		"""
		import logging
		import tempfile
		from typing import cast, Optional, Tuple
		from typing import cast, Generator, Optional, Tuple, Union
		import numpy as np

		from deepchem.utils.typing import RDKitMol
		from deepchem.models import Model
		from deepchem.feat import ComplexFeaturizer
		from deepchem.data import NumpyDataset
		from deepchem.dock import PoseGenerator

		logger = logging.getLogger(__name__)
		POSED_COMPLEX = Tuple[RDKitMol, RDKitMol]


		class Docker(object):
		@@ -36,11 +38,11 @@ class Docker(object):

		Parameters
		----------
		pose_generator: `PoseGenerator`
		pose_generator: PoseGenerator
		The pose generator to use for this model
		featurizer: `ComplexFeaturizer`, optional (default None)
		featurizer: ComplexFeaturizer, optional (default None)
		Featurizer associated with `scoring_model`
		scoring_model: `Model`, optional (default None)
		scoring_model: Model, optional (default None)
		Should make predictions on molecular complex.
		"""
		if ((featurizer is not None and scoring_model is None) or
		@@ -60,7 +62,9 @@ class Docker(object):
		num_modes: int = 9,
		num_pockets: Optional[int] = None,
		out_dir: Optional[str] = None,
		use_pose_generator_scores: bool = False):
		use_pose_generator_scores: bool = False
		) -> Union[Generator[POSED_COMPLEX, None, None], Generator[Tuple[
		POSED_COMPLEX, float], None, None]]:
		"""Generic docking function.

		This docking function uses this object's featurizer, pose
		@@ -69,13 +73,13 @@ class Docker(object):

		Parameters
		----------
		molecular_complex: Tuple[str]
		molecular_complex: Tuple[str, str]
		A representation of a molecular complex. This tuple is
		(protein_file, ligand_file).
		centroid: np.ndarray, optional (default None)
		The centroid to dock against. Is computed if not specified.
		box_dims: np.ndarray, optional (default None)
		Of shape `(3,)` holding the size of the box to dock. If not
		A numpy array of shape `(3,)` holding the size of the box to dock. If not
		specified is set to size of molecular complex plus 5 angstroms.
		exhaustiveness: int, optional (default 10)
		Tells pose generator how exhaustive it should be with pose
		@@ -96,6 +100,7 @@ class Docker(object):

		Returns
		-------
		Generator[Tuple[`posed_complex`, `score`]] or Generator[`posed_complex`]
		A generator. If `use_pose_generator_scores==True` or
		`self.scoring_model` is set, then will yield tuples
		`(posed_complex, score)`. Else will yield `posed_complex`.

deepchem/dock/pose_generation.py

+18 −14

Original line number	Diff line number	Diff line
		@@ -9,16 +9,17 @@ import tarfile
		import numpy as np
		from subprocess import call
		from subprocess import check_output
		from typing import Optional, Tuple
		from typing import List, Optional, Tuple, Union

		from deepchem.dock.binding_pocket import BindingPocketFinder
		from deepchem.utils import download_url, get_data_dir
		from deepchem.utils.mol_xyz_util import get_molecule_range
		from deepchem.utils.geometry_utils import compute_centroid
		from deepchem.utils.typing import RDKitMol
		from deepchem.utils.geometry_utils import compute_centroid, compute_protein_range
		from deepchem.utils.rdkit_util import load_molecule, write_molecule
		from deepchem.utils.vina_utils import load_docked_ligands, write_vina_conf

		logger = logging.getLogger(__name__)
		DOCKED_POSES = List[Tuple[RDKitMol, RDKitMol]]


		class PoseGenerator(object):
		@@ -52,7 +53,7 @@ class PoseGenerator(object):
		centroid: np.ndarray, optional (default None)
		The centroid to dock against. Is computed if not specified.
		box_dims: np.ndarray, optional (default None)
		Of shape `(3,)` holding the size of the box to dock. If not
		A numpy array of shape `(3,)` holding the size of the box to dock. If not
		specified is set to size of molecular complex plus 5 angstroms.
		exhaustiveness: int, optional (default 10)
		Tells pose generator how exhaustive it should be with pose
		@@ -102,7 +103,7 @@ class VinaPoseGenerator(PoseGenerator):
		sixty_four_bits: bool, optional (default True)
		Specifies whether this is a 64-bit machine. Needed to download
		the correct executable.
		pocket_finder: object, optional (default None)
		pocket_finder: BindingPocketFinder, optional (default None)
		If specified should be an instance of
		`dc.dock.BindingPocketFinder`.
		"""
		@@ -156,20 +157,22 @@ class VinaPoseGenerator(PoseGenerator):
		num_modes: int = 9,
		num_pockets: Optional[int] = None,
		out_dir: Optional[str] = None,
		generate_scores: bool = False):
		generate_scores: bool = False
		) -> Union[Tuple[DOCKED_POSES, List[float]], DOCKED_POSES]:
		"""Generates the docked complex and outputs files for docked complex.

		TODO: How can this work on Windows? We need to install a .msi file and invoke it correctly from Python for this to work.
		TODO: How can this work on Windows? We need to install a .msi file and
		invoke it correctly from Python for this to work.

		Parameters
		----------
		molecular_complexes: Tuple[str]
		molecular_complexes: Tuple[str, str]
		A representation of a molecular complex. This tuple is
		(protein_file, ligand_file).
		centroid: np.ndarray, optional
		The centroid to dock against. Is computed if not specified.
		box_dims: np.ndarray, optional
		Of shape `(3,)` holding the size of the box to dock. If not
		A numpy array of shape `(3,)` holding the size of the box to dock. If not
		specified is set to size of molecular complex plus 5 angstroms.
		exhaustiveness: int, optional (default 10)
		Tells Autodock Vina how exhaustive it should be with pose
		@@ -190,10 +193,11 @@ class VinaPoseGenerator(PoseGenerator):

		Returns
		-------
		Tuple of `(docked_poses, scores)`. `docked_poses` is a list of
		docked molecular complexes. Each entry in this list contains a
		`(protein_mol, ligand_mol)` pair of RDKit molecules. `scores` is a
		list of binding free energies predicted by Vina.
		Tuple[`docked_poses`, `scores`] or `docked_poses`
		Tuple of `(docked_poses, scores)` or `docked_poses`. `docked_poses`
		is a list of docked molecular complexes. Each entry in this list
		contains a `(protein_mol, ligand_mol)` pair of RDKit molecules.
		`scores` is a list of binding free energies predicted by Vina.

		Raises
		------
		@@ -232,7 +236,7 @@ class VinaPoseGenerator(PoseGenerator):
		if self.pocket_finder is None:
		logger.info("Pockets not specified. Will use whole protein to dock")
		protein_centroid = compute_centroid(protein_mol[0])
		protein_range = get_molecule_range(protein_mol[0])
		protein_range = compute_protein_range(protein_mol[0])
		box_dims = protein_range + 5.0
		centroids, dimensions = [protein_centroid], [box_dims]
		else:

deepchem/dock/pose_scoring.py

+12 −12

Original line number	Diff line number	Diff line
		@@ -10,9 +10,9 @@ def pairwise_distances(coords1: np.ndarray, coords2: np.ndarray) -> np.ndarray:
		Parameters
		----------
		coords1: np.ndarray
		Of shape `(N, 3)`
		A numpy array of shape `(N, 3)`
		coords2: np.ndarray
		Of shape `(M, 3)`
		A numpy array of shape `(M, 3)`

		Returns
		-------
		@@ -28,7 +28,7 @@ def cutoff_filter(d: np.ndarray, x: np.ndarray, cutoff=8.0) -> np.ndarray:
		Parameters
		----------
		d: np.ndarray
		Pairwise distances matrix. Of shape `(N, M)`
		Pairwise distances matrix. A numpy array of shape `(N, M)`
		x: np.ndarray
		Matrix of shape `(N, M)`
		cutoff: float, optional (default 8)
		@@ -48,7 +48,7 @@ def vina_nonlinearity(c: np.ndarray, w: float, Nrot: int) -> np.ndarray:
		Parameters
		----------
		c: np.ndarray
		Of shape `(N, M)`
		A numpy array of shape `(N, M)`
		w: float
		Weighting term
		Nrot: int
		@@ -69,7 +69,7 @@ def vina_repulsion(d: np.ndarray) -> np.ndarray:
		Parameters
		----------
		d: np.ndarray
		Of shape `(N, M)`.
		A numpy array of shape `(N, M)`.

		Returns
		-------
		@@ -87,7 +87,7 @@ def vina_hydrophobic(d: np.ndarray) -> np.ndarray:
		Parameters
		----------
		d: np.ndarray
		Of shape `(N, M)`.
		A numpy array of shape `(N, M)`.

		Returns
		-------
		@@ -113,7 +113,7 @@ def vina_hbond(d: np.ndarray) -> np.ndarray:
		Parameters
		----------
		d: np.ndarray
		Of shape `(N, M)`.
		A numpy array of shape `(N, M)`.

		Returns
		-------
		@@ -140,7 +140,7 @@ def vina_gaussian_first(d: np.ndarray) -> np.ndarray:
		Parameters
		----------
		d: np.ndarray
		Of shape `(N, M)`.
		A numpy array of shape `(N, M)`.

		Returns
		-------
		@@ -165,7 +165,7 @@ def vina_gaussian_second(d: np.ndarray) -> np.ndarray:
		Parameters
		----------
		d: np.ndarray
		Of shape `(N, M)`.
		A numpy array of shape `(N, M)`.

		Returns
		-------
		@@ -188,9 +188,9 @@ def weighted_linear_sum(w: np.ndarray, x: np.ndarray) -> np.ndarray:
		Parameters
		----------
		w: np.ndarray
		Of shape `(N,)`
		A numpy array of shape `(N,)`
		x: np.ndarray
		Of shape `(N,)`
		A numpy array of shape `(N,)`

		Returns
		-------
		@@ -211,7 +211,7 @@ def vina_energy_term(coords1: np.ndarray, coords2: np.ndarray,
		coords2: np.ndarray
		Molecular coordinates of shape `(M, 3)`
		weights: np.ndarray
		Of shape `(5,)`
		A numpy array of shape `(5,)`
		wrot: float
		The scaling factor for nonlinearity
		Nrot: int

Admin message