Commit 24ac9c6d authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Saving binding pocket feat

parent fc145e33
Loading
Loading
Loading
Loading
+10 −3
Original line number Diff line number Diff line
@@ -164,7 +164,7 @@ def merge_overlapping_boxes(mapping, boxes, threshold=.8):
class BindingPocketFinder(object):
  """Abstract superclass for binding pocket detectors"""

  def find_pockets(self, protein_file):
  def find_pockets(self, protein_file, ligand_file):
    """Finds potential binding pockets in proteins."""
    raise NotImplementedError

@@ -188,5 +188,12 @@ class ConvexHullPocketFinder(BindingPocketFinder):
    ligand_coords = load_molecule(ligand_file)[0]
    boxes = get_all_boxes(protein_coords, self.pad)
    mapping = boxes_to_atoms(protein_coords, boxes)
    merged_boxes, mapping = merge_overlapping_boxes(mapping, boxes)
    return merged_boxes, mapping
    pockets, pocket_atoms = merge_overlapping_boxes(mapping, boxes)
    pocket_coords = []
    for pocket in pockets:
      atoms = pocket_atoms[pocket]
      coords = np.zeros((len(atoms), 3))
      for ind, atom in enumerate(atoms):
        coords[ind] = protein_coords[atom]
      pocket_coords.append(coords)
    return pockets, pocket_atoms, pocket_coords
+2 −2
Original line number Diff line number Diff line
@@ -124,7 +124,7 @@ class TestPoseGeneration(unittest.TestCase):
    finder = dc.dock.ConvexHullPocketFinder()

    all_pockets = finder.find_all_pockets(protein_file)
    pockets = finder.find_pockets(protein_file, ligand_file)
    pockets, _, _ = finder.find_pockets(protein_file, ligand_file)

    assert len(pockets) < len(all_pockets)

@@ -138,7 +138,7 @@ class TestPoseGeneration(unittest.TestCase):
        dc.dock.binding_pocket.extract_active_site(
            protein_file, ligand_file))
    finder = dc.dock.ConvexHullPocketFinder()
    pockets, pocket_atoms = finder.find_pockets(protein_file, ligand_file)
    pockets, pocket_atoms, _ = finder.find_pockets(protein_file, ligand_file)

    # Add active site to dict
    print("active_site_box")
+1 −0
Original line number Diff line number Diff line
@@ -18,3 +18,4 @@ from deepchem.feat.basic import RDKitDescriptors
from deepchem.feat.coulomb_matrices import CoulombMatrixEig
from deepchem.feat.grid_featurizer import GridFeaturizer
from deepchem.feat.nnscore_utils import hydrogenate_and_compute_partial_charges
from deepchem.feat.binding_pocket_features import BindingPocketFeaturizer
+50 −0
Original line number Diff line number Diff line
"""
Featurizes proposed binding pockets.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

__author__ = "Bharath Ramsundar"
__copyright__ = "Copyright 2017, Stanford University"
__license__ = "GPL"

import numpy as np
import os
import pybel
import tempfile
import mdtraj as md
from scipy.spatial import ConvexHull
from deepchem.feat import hydrogenate_and_compute_partial_charges
from deepchem.feat.atomic_coordinates import AtomicCoordinates
from deepchem.feat.grid_featurizer import load_molecule
from deepchem.feat import Featurizer

class BindingPocketFeaturizer(Featurizer):
  """
  Featurizes binding pockets with information about chemical environments.
  """

  residues = ["ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS",
              "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "PYL", "SER", "SEC",
              "THR", "TRP", "TYR", "VAL", "ASX", "GLX"]

  def featurize(self, protein_file, pockets, pocket_atoms, pocket_coords):
    """
    Calculate atomic coodinates.
    """
    protein = md.load(protein_file)
    n_pockets = len(pockets)
    n_residues = len(BindingPocketFeaturizer.residues)
    res_map = dict(zip(BindingPocketFeaturizer.residues, range(n_residues)))
    all_features = np.zeros((n_pockets, n_residues)) 
    for pocket_num, (pocket, coords) in enumerate(zip(pockets, pocket_coords)):
      atoms = pocket_atoms[pocket]
      for atom in atoms:
        atom_name = str(protein.top.atom(atom))
        # atom_name is of format RESX-ATOMTYPE
        # where X is a 1 to 4 digit number
        residue = atom_name[:3]
        atomtype = atom_name.split("-")[1]
        all_features[pocket_nu, res_map[residue]] += 1
    return all_features 
+32 −0
Original line number Diff line number Diff line
"""
Test atomic coordinates and neighbor lists.
"""
import os
import numpy as np
import unittest
from rdkit import Chem
import deepchem as dc

class TestAtomicCoordinates(unittest.TestCase):
  """
  Test AtomicCoordinates.
  """

  def test_atomic_coordinates(self):
    """
    Simple test that atomic coordinates returns ndarray of right shape.
    """
    current_dir = os.path.dirname(os.path.realpath(__file__))
    protein_file = os.path.join(current_dir, "../../dock/tests/1jld_protein.pdb")
    ligand_file = os.path.join(current_dir, "../../dock/tests/1jld_ligand.sdf")

    finder = dc.dock.ConvexHullPocketFinder()
    pocket_featurizer = dc.feat.BindingPocketFeaturizer()
    pockets, pocket_atoms, pocket_coords = finder.find_pockets(protein_file, ligand_file)
    n_pockets = len(pockets)
    
    pocket_features = pocket_featurizer.featurize(
        protein_file, pockets, pocket_atoms, pocket_coords)
  
    assert isinstance(pocket_features, np.ndarray)
    assert pocket_features.shape[0] == n_pockets