Commit 00a0a416 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Binding pocket features

parent 8dc09898
Loading
Loading
Loading
Loading
+67 −14
Original line number Diff line number Diff line
"""
Featurizes proposed binding pockets.
"""
__author__ = "Bharath Ramsundar"
__copyright__ = "Copyright 2017, Stanford University"
__license__ = "MIT"

import numpy as np
from deepchem.utils.save import log
import logging
from deepchem.utils import rdkit_util
from deepchem.feat import Featurizer

logger = logging.getLogger(__name__)

class BindingPocketFeaturizer(Featurizer):
def boxes_to_atoms(coords, boxes):
  """Maps each box to a list of atoms in that box.

  Given the coordinates of a macromolecule, and a collection of boxes,
  returns a dictionary which maps boxes to the atom indices of the
  atoms in them.

  Parameters
  ----------
  coords: np.ndarray
    Of shape `(N, 3)
  boxes: list
    list of `CoordinateBox` objects.

  Returns
  -------
  dictionary mapping `CoordinateBox` objects to lists of atom coordinates
  """
  Featurizes binding pockets with information about chemical environments.
  mapping = {}
  for box_ind, box in enumerate(boxes):
    box_atoms = []
    for atom_ind in range(len(coords)):
      atom = coords[atom_ind]
      if atom in box:
        box_atoms.append(atom_ind)
    mapping[box] = box_atoms
  return mapping


class BindingPocketFeaturizer(Featurizer):
  """Featurizes binding pockets with information about chemical
  environments.

  In many applications, it's desirable to look at binding pockets on
  macromolecules which may be good targets for potential ligands or
  other molecules to interact with. A `BindingPocketFeaturizer`
  expects to be given a macromolecule, and a list of pockets to
  featurize on that macromolecule. These pockets should be of the form
  produced by a `dc.dock.BindingPocketFinder`, that is as a list of
  `dc.utils.CoordinateBox` objects.

  The base featurization in this class's featurization is currently
  very simple and counts the number of residues of each type present
  in the pocket. It's likely that you'll want to overwrite this
  implementation for more sophisticated downstream usecases. Note that
  this class's implementation will only work for proteins and not for
  other macromolecules
  """

  residues = [
@@ -25,28 +67,39 @@ class BindingPocketFeaturizer(Featurizer):

  def featurize(self,
                protein_file,
                pockets,
                pocket_atoms_map,
                pocket_coords,
                verbose=False):
                pockets):
    """
    Calculate atomic coodinates.

    Params
    ------
    protein_file: str
      Location of PDB file. Will be loaded by MDTraj
    pockets: list[CoordinateBox]
      List of `dc.utils.CoordinateBox` objects.

    Returns
    -------
    A numpy array of shale `(len(pockets), n_residues)`
    """
    import mdtraj
    protein_coords = rdkit_util.load_molecule(
        protein_file, add_hydrogens=False, calc_charges=False)[0]
    mapping = boxes_to_atoms(protein_coords, pockets)
    protein = mdtraj.load(protein_file)
    n_pockets = len(pockets)
    n_residues = len(BindingPocketFeaturizer.residues)
    res_map = dict(zip(BindingPocketFeaturizer.residues, range(n_residues)))
    all_features = np.zeros((n_pockets, n_residues))
    for pocket_num, (pocket, coords) in enumerate(zip(pockets, pocket_coords)):
      pocket_atoms = pocket_atoms_map[pocket]
    for pocket_num, pocket in enumerate(pockets):
      pocket_atoms = mapping[pocket]
      for ind, atom in enumerate(pocket_atoms):
        atom_name = str(protein.top.atom(atom))
        # atom_name is of format RESX-ATOMTYPE
        # where X is a 1 to 4 digit number
        residue = atom_name[:3]
        if residue not in res_map:
          log("Warning: Non-standard residue in PDB file", verbose)
          logger.info("Warning: Non-standard residue in PDB file")
          continue
        atomtype = atom_name.split("-")[1]
        all_features[pocket_num, res_map[residue]] += 1
+7 −8
Original line number Diff line number Diff line
"""
Test atomic coordinates and neighbor lists.
Test Binding Pocket Features. 
"""
import os
import numpy as np
@@ -7,14 +7,14 @@ import unittest
import deepchem as dc


class TestAtomicCoordinates(unittest.TestCase):
class TestBindingPocketFeatures(unittest.TestCase):
  """
  Test AtomicCoordinates.
  """

  def test_atomic_coordinates(self):
  def test_pocket_features(self):
    """
    Simple test that atomic coordinates returns ndarray of right shape.
    Simple test that pocket_features return right shapes.
    """
    current_dir = os.path.dirname(os.path.realpath(__file__))
    protein_file = os.path.join(current_dir,
@@ -23,12 +23,11 @@ class TestAtomicCoordinates(unittest.TestCase):

    finder = dc.dock.ConvexHullPocketFinder()
    pocket_featurizer = dc.feat.BindingPocketFeaturizer()
    pockets, pocket_atoms, pocket_coords = finder.find_pockets(
        protein_file, ligand_file)
    pockets = finder.find_pockets(protein_file)
    n_pockets = len(pockets)

    pocket_features = pocket_featurizer.featurize(protein_file, pockets,
                                                  pocket_atoms, pocket_coords)
    pocket_features = pocket_featurizer.featurize(protein_file,
                                                  pockets)

    assert isinstance(pocket_features, np.ndarray)
    assert pocket_features.shape[0] == n_pockets