441 No Babel: Remove All Open Babel Dependencies (763f8721) · Commits · 钟慕尧 / deepchem

README.md

+8 −11

Original line number	Diff line number	Diff line
		@@ -25,7 +25,6 @@ Stanford and originally created by [Bharath Ramsundar](http://rbharath.github.io
		* [About Us](#about-us)

		## Requirements
		* [openbabel](http://openbabel.org/wiki/Main_Page)
		* [pandas](http://pandas.pydata.org/)
		* [rdkit](http://www.rdkit.org/docs/Install.html)
		* [boost](http://www.boost.org/)
		@@ -43,28 +42,26 @@ Installation from source is the only currently supported format. ```deepchem```
		### Full Anaconda distribution

		1. Download the 64-bit Python 2.7 or Python 3.5 versions of Anaconda for linux [here](https://www.continuum.io/downloads#_unix).

		Follow the [installation instructions](http://docs.continuum.io/anaconda/install#linux-install)

		2. `openbabel`
		```bash
		conda install -c omnia openbabel=2.4.0
		```

		3. `rdkit`
		2. `rdkit`
		```bash
		conda install -c rdkit rdkit
		```

		4. `joblib`
		3. `joblib`
		```bash
		conda install joblib
		```

		5. `six`
		4. `six`
		```bash
		pip install six
		```
		5. `networkx`
		```bash
		conda install -c anaconda networkx=1.11
		```

		6. `mdtraj`
		```bash

deepchem/dock/binding_pocket.py

+89 −72

Original line number	Diff line number	Diff line
		@@ -12,22 +12,24 @@ __license__ = "GPL"
		import os
		import tempfile
		import numpy as np
		import openbabel as ob
		from rdkit import Chem
		from subprocess import call
		from scipy.spatial import ConvexHull
		from deepchem.feat import hydrogenate_and_compute_partial_charges
		from deepchem.feat.atomic_coordinates import AtomicCoordinates
		from deepchem.feat.grid_featurizer import load_molecule
		from deepchem.feat.binding_pocket_features import BindingPocketFeaturizer
		from deepchem.feat.fingerprints import CircularFingerprint
		from deepchem.models.sklearn_models import SklearnModel
		from deepchem.data.datasets import NumpyDataset
		from deepchem.utils import rdkit_util


		def extract_active_site(protein_file, ligand_file, cutoff=4):
		"""Extracts a box for the active site."""
		protein_coords = load_molecule(protein_file, add_hydrogens=False)[0]
		ligand_coords = load_molecule(ligand_file, add_hydrogens=False)[0]
		protein_coords = rdkit_util.load_molecule(
		protein_file, add_hydrogens=False)[0]
		ligand_coords = rdkit_util.load_molecule(
		ligand_file, add_hydrogens=True, calc_charges=True)[0]
		num_ligand_atoms = len(ligand_coords)
		num_protein_atoms = len(protein_coords)
		pocket_inds = []
		@@ -52,8 +54,9 @@ def extract_active_site(protein_file, ligand_file, cutoff=4):
		y_max = int(np.ceil(np.amax(pocket_coords[:, 1])))
		z_min = int(np.floor(np.amin(pocket_coords[:, 2])))
		z_max = int(np.ceil(np.amax(pocket_coords[:, 2])))
		return (((x_min, x_max), (y_min, y_max), (z_min, z_max)),
		pocket_atoms, pocket_coords)
		return (((x_min, x_max), (y_min, y_max), (z_min, z_max)), pocket_atoms,
		pocket_coords)


		def compute_overlap(mapping, box1, box2):
		"""Computes overlap between the two boxes.
		@@ -65,6 +68,7 @@ def compute_overlap(mapping, box1, box2):
		atom2 = set(mapping[box2])
		return len(atom1.intersection(atom2)) / float(len(atom1))


		def get_all_boxes(coords, pad=5):
		"""Get all pocket boxes for protein coords.

		@@ -89,6 +93,7 @@ def get_all_boxes(coords, pad=5):
		boxes.append(((x_min, x_max), (y_min, y_max), (z_min, z_max)))
		return boxes


		def boxes_to_atoms(atom_coords, boxes):
		"""Maps each box to a list of atoms in that box.

		@@ -110,6 +115,7 @@ def boxes_to_atoms(atom_coords, boxes):
		mapping[box] = box_atoms
		return mapping


		def merge_boxes(box1, box2):
		"""Merges two boxes."""
		(x_min1, x_max1), (y_min1, y_max1), (z_min1, z_max1) = box1
		@@ -122,6 +128,7 @@ def merge_boxes(box1, box2):
		z_max = max(z_max1, z_max2)
		return ((x_min, x_max), (y_min, y_max), (z_min, z_max))


		def merge_overlapping_boxes(mapping, boxes, threshold=.8):
		"""Merge boxes which have an overlap greater than threshold.

		@@ -166,6 +173,7 @@ def merge_overlapping_boxes(mapping, boxes, threshold=.8):
		mapping = new_mapping
		return outputs, mapping


		class BindingPocketFinder(object):
		"""Abstract superclass for binding pocket detectors"""

		@@ -173,24 +181,28 @@ class BindingPocketFinder(object):
		"""Finds potential binding pockets in proteins."""
		raise NotImplementedError


		class ConvexHullPocketFinder(BindingPocketFinder):
		"""Implementation that uses convex hull of protein to find pockets.

		Based on https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4112621/pdf/1472-6807-14-18.pdf
		"""

		def __init__(self, pad=5):
		self.pad = pad

		def find_all_pockets(self, protein_file):
		"""Find list of binding pockets on protein."""
		# protein_coords is (N, 3) tensor
		coords = load_molecule(protein_file, add_hydrogens=False)[0]
		coords = rdkit_util.load_molecule(protein_file)[0]
		return get_all_boxes(coords, self.pad)

		def find_pockets(self, protein_file, ligand_file):
		"""Find list of suitable binding pockets on protein."""
		protein_coords = load_molecule(protein_file, add_hydrogens=False)[0]
		ligand_coords = load_molecule(ligand_file, add_hydrogens=False)[0]
		protein_coords = rdkit_util.load_molecule(
		protein_file, add_hydrogens=False, calc_charges=False)[0]
		ligand_coords = rdkit_util.load_molecule(
		ligand_file, add_hydrogens=False, calc_charges=False)[0]
		boxes = get_all_boxes(protein_coords, self.pad)
		mapping = boxes_to_atoms(protein_coords, boxes)
		pockets, pocket_atoms_map = merge_overlapping_boxes(mapping, boxes)
		@@ -203,6 +215,7 @@ class ConvexHullPocketFinder(BindingPocketFinder):
		pocket_coords.append(coords)
		return pockets, pocket_atoms_map, pocket_coords


		class RFConvexHullPocketFinder(BindingPocketFinder):
		"""Uses pre-trained RF model + ConvexHulPocketFinder to select pockets."""

		@@ -214,7 +227,9 @@ class RFConvexHullPocketFinder(BindingPocketFinder):
		self.base_dir = tempfile.mkdtemp()
		print("About to download trained model.")
		# TODO(rbharath): Shift refined to full once trained.
		call(("wget -c http://deepchem.io.s3-website-us-west-1.amazonaws.com/trained_models/pocket_random_refined_RF.tar.gz").split())
		call((
		"wget -c http://deepchem.io.s3-website-us-west-1.amazonaws.com/trained_models/pocket_random_refined_RF.tar.gz"
		).split())
		call(("tar -zxvf pocket_random_refined_RF.tar.gz").split())
		call(("mv pocket_random_refined_RF %s" % (self.base_dir)).split())
		self.model_dir = os.path.join(self.base_dir, "pocket_random_refined_RF")
		@@ -235,55 +250,57 @@ class RFConvexHullPocketFinder(BindingPocketFinder):
		examples/binding_pockets/binding_pocket_datasets.py. Find way to refactor
		to avoid code duplication.
		"""
		if not ligand_file.endswith(".sdf"):
		raise ValueError("Only .sdf ligand files can be featurized.")
		ligand_basename = os.path.basename(ligand_file).split(".")[0]
		ligand_mol2 = os.path.join(
		self.base_dir, ligand_basename + ".mol2")

		# Write mol2 file for ligand
		obConversion = ob.OBConversion()
		conv_out = obConversion.SetInAndOutFormats(str("sdf"), str("mol2"))
		ob_mol = ob.OBMol()
		obConversion.ReadFile(ob_mol, str(ligand_file))
		obConversion.WriteFile(ob_mol, str(ligand_mol2))

		# Featurize ligand
		mol = Chem.MolFromMol2File(str(ligand_mol2), removeHs=False)
		if mol is None:
		return None, None
		# Default for CircularFingerprint
		n_ligand_features = 1024
		ligand_features = self.ligand_featurizer.featurize([mol])

		# Featurize pocket
		pockets, pocket_atoms_map, pocket_coords = self.convex_finder.find_pockets(
		protein_file, ligand_file)
		n_pockets = len(pockets)
		n_pocket_features = BindingPocketFeaturizer.n_features

		features = np.zeros((n_pockets, n_pocket_features+n_ligand_features))
		pocket_features = self.pocket_featurizer.featurize(
		protein_file, pockets, pocket_atoms_map, pocket_coords)
		# Note broadcast operation
		features[:, :n_pocket_features] = pocket_features
		features[:, n_pocket_features:] = ligand_features
		dataset = NumpyDataset(X=features)
		pocket_preds = self.model.predict(dataset)
		pocket_pred_proba = np.squeeze(self.model.predict_proba(dataset))

		# Find pockets which are active
		active_pockets = []
		active_pocket_atoms_map = {}
		active_pocket_coords = []
		for pocket_ind in range(len(pockets)):
		#################################################### DEBUG
		# TODO(rbharath): For now, using a weak cutoff. Fix later.
		#if pocket_preds[pocket_ind] == 1:
		if pocket_pred_proba[pocket_ind][1] > .15:
		#################################################### DEBUG
		pocket = pockets[pocket_ind]
		active_pockets.append(pocket)
		active_pocket_atoms_map[pocket] = pocket_atoms_map[pocket]
		active_pocket_coords.append(pocket_coords[pocket_ind])
		return active_pockets, active_pocket_atoms_map, active_pocket_coords
		# if not ligand_file.endswith(".sdf"):
		# raise ValueError("Only .sdf ligand files can be featurized.")
		# ligand_basename = os.path.basename(ligand_file).split(".")[0]
		# ligand_mol2 = os.path.join(
		# self.base_dir, ligand_basename + ".mol2")
		#
		# # Write mol2 file for ligand
		# obConversion = ob.OBConversion()
		# conv_out = obConversion.SetInAndOutFormats(str("sdf"), str("mol2"))
		# ob_mol = ob.OBMol()
		# obConversion.ReadFile(ob_mol, str(ligand_file))
		# obConversion.WriteFile(ob_mol, str(ligand_mol2))
		#
		# # Featurize ligand
		# mol = Chem.MolFromMol2File(str(ligand_mol2), removeHs=False)
		# if mol is None:
		# return None, None
		# # Default for CircularFingerprint
		# n_ligand_features = 1024
		# ligand_features = self.ligand_featurizer.featurize([mol])
		#
		# # Featurize pocket
		# pockets, pocket_atoms_map, pocket_coords = self.convex_finder.find_pockets(
		# protein_file, ligand_file)
		# n_pockets = len(pockets)
		# n_pocket_features = BindingPocketFeaturizer.n_features
		#
		# features = np.zeros((n_pockets, n_pocket_features+n_ligand_features))
		# pocket_features = self.pocket_featurizer.featurize(
		# protein_file, pockets, pocket_atoms_map, pocket_coords)
		# # Note broadcast operation
		# features[:, :n_pocket_features] = pocket_features
		# features[:, n_pocket_features:] = ligand_features
		# dataset = NumpyDataset(X=features)
		# pocket_preds = self.model.predict(dataset)
		# pocket_pred_proba = np.squeeze(self.model.predict_proba(dataset))
		#
		# # Find pockets which are active
		# active_pockets = []
		# active_pocket_atoms_map = {}
		# active_pocket_coords = []
		# for pocket_ind in range(len(pockets)):
		# #################################################### DEBUG
		# # TODO(rbharath): For now, using a weak cutoff. Fix later.
		# #if pocket_preds[pocket_ind] == 1:
		# if pocket_pred_proba[pocket_ind][1] > .15:
		# #################################################### DEBUG
		# pocket = pockets[pocket_ind]
		# active_pockets.append(pocket)
		# active_pocket_atoms_map[pocket] = pocket_atoms_map[pocket]
		# active_pocket_coords.append(pocket_coords[pocket_ind])
		# return active_pockets, active_pocket_atoms_map, active_pocket_coords
		# # TODO(LESWING)
		raise ValueError("Karl Implement")

deepchem/dock/docking.py

+33 −10

Original line number	Diff line number	Diff line
		@@ -12,7 +12,6 @@ __license__ = "GPL"
		import numpy as np
		import os
		import tempfile
		from deepchem.feat import GridFeaturizer
		from deepchem.data import DiskDataset
		from deepchem.models import SklearnModel
		from deepchem.models import TensorflowMultiTaskRegressor
		@@ -21,13 +20,19 @@ from deepchem.dock.pose_generation import VinaPoseGenerator
		from sklearn.ensemble import RandomForestRegressor
		from subprocess import call


		class Docker(object):
		"""Abstract Class specifying API for Docking."""

		def dock(self, protein_file, ligand_file, centroid=None, box_dims=None,
		def dock(self,
		protein_file,
		ligand_file,
		centroid=None,
		box_dims=None,
		dry_run=False):
		raise NotImplementedError


		class VinaGridRFDocker(Docker):
		"""Vina pose-generation, RF-models on grid-featurization of complexes."""

		@@ -35,7 +40,9 @@ class VinaGridRFDocker(Docker):
		"""Builds model."""
		self.base_dir = tempfile.mkdtemp()
		print("About to download trained model.")
		call(("wget -c http://deepchem.io.s3-website-us-west-1.amazonaws.com/trained_models/random_full_RF.tar.gz").split())
		call((
		"wget -c http://deepchem.io.s3-website-us-west-1.amazonaws.com/trained_models/random_full_RF.tar.gz"
		).split())
		call(("tar -zxvf random_full_RF.tar.gz").split())
		call(("mv random_full_RF %s" % (self.base_dir)).split())
		self.model_dir = os.path.join(self.base_dir, "random_full_RF")
		@@ -48,7 +55,11 @@ class VinaGridRFDocker(Docker):
		self.pose_generator = VinaPoseGenerator(
		exhaustiveness=exhaustiveness, detect_pockets=detect_pockets)

		def dock(self, protein_file, ligand_file, centroid=None, box_dims=None,
		def dock(self,
		protein_file,
		ligand_file,
		centroid=None,
		box_dims=None,
		dry_run=False):
		"""Docks using Vina and RF."""
		protein_docked, ligand_docked = self.pose_generator.generate_poses(
		@@ -59,6 +70,7 @@ class VinaGridRFDocker(Docker):
		score = np.zeros((1,))
		return (score, (protein_docked, ligand_docked))


		class VinaGridDNNDocker(object):
		"""Vina pose-generation, DNN-models on grid-featurization of complexes."""

		@@ -66,7 +78,9 @@ class VinaGridDNNDocker(object):
		"""Builds model."""
		self.base_dir = tempfile.mkdtemp()
		print("About to download trained model.")
		call(("wget -c http://deepchem.io.s3-website-us-west-1.amazonaws.com/trained_models/random_full_DNN.tar.gz").split())
		call((
		"wget -c http://deepchem.io.s3-website-us-west-1.amazonaws.com/trained_models/random_full_DNN.tar.gz"
		).split())
		call(("tar -zxvf random_full_DNN.tar.gz").split())
		call(("mv random_full_DNN %s" % (self.base_dir)).split())
		self.model_dir = os.path.join(self.base_dir, "random_full_DNN")
		@@ -75,15 +89,24 @@ class VinaGridDNNDocker(object):
		pdbbind_tasks = ["-logKd/Ki"]
		n_features = 2052
		model = TensorflowMultiTaskRegressor(
		len(pdbbind_tasks), n_features, logdir=self.model_dir, dropouts=[.25],
		learning_rate=0.0003, weight_init_stddevs=[.1], batch_size=64)
		len(pdbbind_tasks),
		n_features,
		logdir=self.model_dir,
		dropouts=[.25],
		learning_rate=0.0003,
		weight_init_stddevs=[.1],
		batch_size=64)
		model.reload()

		self.pose_scorer = GridPoseScorer(model, feat="grid")
		self.pose_generator = VinaPoseGenerator(
		exhaustiveness=exhaustiveness, detect_pockets=detect_pockets)

		def dock(self, protein_file, ligand_file, centroid=None, box_dims=None,
		def dock(self,
		protein_file,
		ligand_file,
		centroid=None,
		box_dims=None,
		dry_run=False):
		"""Docks using Vina and DNNs."""
		protein_docked, ligand_docked = self.pose_generator.generate_poses(

deepchem/dock/pose_generation.py

+48 −39

Original line number	Diff line number	Diff line
		@@ -5,17 +5,20 @@ from __future__ import print_function
		from __future__ import division
		from __future__ import unicode_literals

		from deepchem.utils import mol_xyz_util

		__author__ = "Bharath Ramsundar"
		__copyright__ = "Copyright 2016, Stanford University"
		__license__ = "GPL"

		import numpy as np
		import os
		import pybel
		import tempfile
		from subprocess import call
		from deepchem.feat import hydrogenate_and_compute_partial_charges
		from deepchem.dock.binding_pocket import RFConvexHullPocketFinder
		from deepchem.utils import rdkit_util


		class PoseGenerator(object):
		"""Abstract superclass for all pose-generation routines."""
		@@ -24,8 +27,13 @@ class PoseGenerator(object):
		"""Generates the docked complex and outputs files for docked complex."""
		raise NotImplementedError

		def write_conf(receptor_filename, ligand_filename, centroid, box_dims,
		conf_filename, exhaustiveness=None):

		def write_conf(receptor_filename,
		ligand_filename,
		centroid,
		box_dims,
		conf_filename,
		exhaustiveness=None):
		"""Writes Vina configuration file to disk."""
		with open(conf_filename, "w") as f:
		f.write("receptor = %s\n" % receptor_filename)
		@@ -42,19 +50,6 @@ def write_conf(receptor_filename, ligand_filename, centroid, box_dims,
		if exhaustiveness is not None:
		f.write("exhaustiveness = %d\n" % exhaustiveness)

		def get_molecule_data(pybel_molecule):
		"""Uses pybel to compute centroid and range of molecule (Angstroms)."""
		atom_positions = []
		for atom in pybel_molecule:
		atom_positions.append(atom.coords)
		num_atoms = len(atom_positions)
		protein_xyz = np.asarray(atom_positions)
		protein_centroid = np.mean(protein_xyz, axis=0)
		protein_max = np.max(protein_xyz, axis=0)
		protein_min = np.min(protein_xyz, axis=0)
		protein_range = protein_max - protein_min
		return protein_centroid, protein_range


		class VinaPoseGenerator(PoseGenerator):
		"""Uses Autodock Vina to generate binding poses."""
		@@ -84,10 +79,13 @@ class VinaPoseGenerator(PoseGenerator):
		call(rm_cmd.split())
		self.vina_cmd = os.path.join(self.vina_dir, "bin/vina")


		def generate_poses(self, protein_file, ligand_file,
		centroid=None, box_dims=None,
		dry_run=False, out_dir=None):
		def generate_poses(self,
		protein_file,
		ligand_file,
		centroid=None,
		box_dims=None,
		dry_run=False,
		out_dir=None):
		"""Generates the docked complex and outputs files for docked complex."""
		if out_dir is None:
		out_dir = tempfile.mkdtemp()
		@@ -96,19 +94,23 @@ class VinaPoseGenerator(PoseGenerator):
		receptor_name = os.path.basename(protein_file).split(".")[0]
		protein_hyd = os.path.join(out_dir, "%s.pdb" % receptor_name)
		protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % receptor_name)
		hydrogenate_and_compute_partial_charges(protein_file, "pdb",
		hydrogenate_and_compute_partial_charges(
		protein_file,
		"pdb",
		hyd_output=protein_hyd,
		pdbqt_output=protein_pdbqt,
		protein=True)
		# Get protein centroid and range
		receptor_pybel = next(pybel.readfile(str("pdb"), str(protein_hyd)))
		# TODO(rbharath): Need to add some way to identify binding pocket, or this is
		# going to be extremely slow!
		if centroid is not None and box_dims is not None:
		protein_centroid = centroid
		else:
		if not self.detect_pockets:
		protein_centroid, protein_range = get_molecule_data(receptor_pybel)
		receptor_mol = rdkit_util.load_molecule(
		protein_hyd, calc_charges=False, add_hydrogens=False)
		protein_centroid = mol_xyz_util.get_molecule_centroid(receptor_mol[0])
		protein_range = mol_xyz_util.get_molecule_range(receptor_mol[0])
		box_dims = protein_range + 5.0
		else:
		print("About to find putative binding pockets")
		@@ -126,22 +128,27 @@ class VinaPoseGenerator(PoseGenerator):
		z_box = (z_max - z_min) / 2.
		box_dims = (x_box, y_box, z_box)


		# Prepare receptor
		ligand_name = os.path.basename(ligand_file).split(".")[0]
		ligand_hyd = os.path.join(out_dir, "%s.pdb" % ligand_name)
		ligand_pdbqt = os.path.join(out_dir, "%s.pdbqt" % ligand_name)

		# TODO(rbharath): Generalize this so can support mol2 files as well.
		hydrogenate_and_compute_partial_charges(ligand_file, "sdf",
		hydrogenate_and_compute_partial_charges(
		ligand_file,
		"sdf",
		hyd_output=ligand_hyd,
		pdbqt_output=ligand_pdbqt,
		protein=False)

		# Write Vina conf file
		conf_file = os.path.join(out_dir, "conf.txt")
		write_conf(protein_pdbqt, ligand_pdbqt, protein_centroid,
		box_dims, conf_file, exhaustiveness=self.exhaustiveness)
		write_conf(
		protein_pdbqt,
		ligand_pdbqt,
		protein_centroid,
		box_dims,
		conf_file,
		exhaustiveness=self.exhaustiveness)

		# Define locations of log and output files
		log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
		@@ -149,8 +156,10 @@ class VinaPoseGenerator(PoseGenerator):
		# TODO(rbharath): Let user specify the number of poses required.
		if not dry_run:
		print("About to call Vina")
		call("%s --config %s --log %s --out %s"
		% (self.vina_cmd, conf_file, log_file, out_pdbqt), shell=True)
		call(
		"%s --config %s --log %s --out %s" %
		(self.vina_cmd, conf_file, log_file, out_pdbqt),
		shell=True)
		# TODO(rbharath): Convert the output pdbqt to a pdb file.

		# Return docked files

deepchem/dock/pose_scoring.py

+13 −6

Original line number	Diff line number	Diff line
		@@ -5,6 +5,8 @@ from __future__ import print_function
		from __future__ import division
		from __future__ import unicode_literals

		from deepchem.feat import RdkitGridFeaturizer

		__author__ = "Bharath Ramsundar"
		__copyright__ = "Copyright 2016, Stanford University"
		__license__ = "GPL"
		@@ -12,10 +14,10 @@ __license__ = "GPL"
		import numpy as np
		import os
		import tempfile
		from deepchem.feat import GridFeaturizer
		from deepchem.data import NumpyDataset
		from subprocess import call


		class PoseScorer(object):
		"""Abstract superclass for all scoring methods."""

		@@ -23,27 +25,32 @@ class PoseScorer(object):
		"""Returns a score for a protein/ligand pair."""
		raise NotImplementedError


		class GridPoseScorer(object):

		def __init__(self, model, feat="grid"):
		"""Initializes a pose-scorer."""
		self.model = model
		if feat == "grid":
		self.featurizer = GridFeaturizer(
		voxel_width=16.0, feature_types="voxel_combined",
		self.featurizer = RdkitGridFeaturizer(
		voxel_width=16.0,
		feature_types="voxel_combined",
		# TODO(rbharath, enf): Figure out why pi_stack is slow and cation_pi
		# causes segfaults.
		#voxel_feature_types=["ecfp", "splif", "hbond", "pi_stack", "cation_pi",
		#"salt_bridge"], ecfp_power=9, splif_power=9,
		voxel_feature_types=["ecfp", "splif", "hbond", "salt_bridge"],
		ecfp_power=9, splif_power=9,
		parallel=True, flatten=True)
		ecfp_power=9,
		splif_power=9,
		parallel=True,
		flatten=True)
		else:
		raise ValueError("feat not defined.")

		def score(self, protein_file, ligand_file):
		"""Returns a score for a protein/ligand pair."""
		features = self.featurizer.featurize_complexes([ligand_file], [protein_file])
		features = self.featurizer.featurize_complexes([ligand_file],
		[protein_file])
		dataset = NumpyDataset(X=features, y=None, w=None, ids=None)
		score = self.model.predict(dataset)
		return score

Admin message