Commit aa8095e1 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

voxel

parent f1665bc8
Loading
Loading
Loading
Loading
+17 −6
Original line number Diff line number Diff line
@@ -133,8 +133,10 @@ class ContactCircularVoxelizer(ComplexFeaturizer):
  in which they originated.

  Featurizes a macromolecular complex into a tensor of shape
  `(voxels_per_edge, voxels_per_edge, voxels_per_edge, size)`
  where `voxels_per_edge = int(box_width/voxel_width)`.
  `(voxels_per_edge, voxels_per_edge, voxels_per_edge, size)` where
  `voxels_per_edge = int(box_width/voxel_width)`. If `flatten==True`,
  then returns a flattened version of this tensor of length
  `size*voxels_per_edge**3`
  """

  def __init__(self, 
@@ -142,7 +144,8 @@ class ContactCircularVoxelizer(ComplexFeaturizer):
               radius=2,
               size=8,
               box_width=16.0,
               voxel_width=1.0):
               voxel_width=1.0,
               flatten=False):
    """
    Parameters
    ----------
@@ -157,6 +160,9 @@ class ContactCircularVoxelizer(ComplexFeaturizer):
      is centered on a ligand centroid.
    voxel_width: float, optional (default 1.0)
      Size of a 3D voxel in a grid.
    flatten: bool, optional (default False)
      If True, then returns a flat feature vector rather than voxel grid. This
      feature vector is constructed by flattening the usual voxel grid.
    """
    self.cutoff = cutoff
    self.radius = radius
@@ -164,6 +170,7 @@ class ContactCircularVoxelizer(ComplexFeaturizer):
    self.box_width = box_width
    self.voxel_width = voxel_width
    self.voxels_per_edge = int(self.box_width / self.voxel_width)
    self.flatten = flatten

  def _featurize_complex(self, molecular_complex):
    """
@@ -193,7 +200,6 @@ class ContactCircularVoxelizer(ComplexFeaturizer):
          sum([
              voxelize(
                  convert_atom_to_voxel,
                  self.voxels_per_edge,
                  self.box_width,
                  self.voxel_width,
                  hash_ecfp,
@@ -209,5 +215,10 @@ class ContactCircularVoxelizer(ComplexFeaturizer):
                                            ecfp_degree=self.radius))
          ])
      )
    # Features are of shape (voxels_per_edge, voxels_per_edge, voxels_per_edge, num_feat) so we should concatenate on the last axis.
    if self.flatten:
      return np.concatenate([features.flatten() for features in pairwise_features])
    else:
      # Features are of shape (voxels_per_edge, voxels_per_edge,
      # voxels_per_edge, num_feat) so we should concatenate on the last
      # axis.
      return np.concatenate(pairwise_features, axis=-1)
+6 −8
Original line number Diff line number Diff line
@@ -37,8 +37,6 @@ logger = logging.getLogger(__name__)
http://stackoverflow.com/questions/2827393/angles-between-two-n-dimensional-vectors-in-python
"""

# TODO(rbharath): Consider this comment on rdkit forums https://github.com/rdkit/rdkit/issues/1590 about sybyl featurization

FLAT_FEATURES = ['ecfp_ligand', 'ecfp_hashed', 'splif_hashed', 'hbond_count']

VOXEL_FEATURES = [
@@ -306,12 +304,12 @@ class RdkitGridFeaturizer(ComplexFeaturizer):
      logger.warning("Some molecules cannot be loaded by Rdkit. Skipping")
      return None

    time1 = time.time()
    centroid = compute_centroid(ligand_xyz)
    ligand_xyz = subtract_centroid(ligand_xyz, centroid)
    protein_xyz = subtract_centroid(protein_xyz, centroid)
    time2 = time.time()
    logger.info("TIMING: Centroid processing took %0.3f s" % (time2 - time1))
    #time1 = time.time()
    #centroid = compute_centroid(ligand_xyz)
    #ligand_xyz = subtract_centroid(ligand_xyz, centroid)
    #protein_xyz = subtract_centroid(protein_xyz, centroid)
    #time2 = time.time()
    #logger.info("TIMING: Centroid processing took %0.3f s" % (time2 - time1))

    pairwise_distances = compute_pairwise_distances(protein_xyz, ligand_xyz)

+1 −1
Original line number Diff line number Diff line
@@ -56,7 +56,7 @@ def compute_splif_features_in_range(frag1,
  }
  return (splif_dict)

def featurize_splif(frag1, frag1, contact_bins,
def featurize_splif(frag1, frag2, contact_bins,
                    pairwise_distances, ecfp_degree):
  """Computes SPLIF featurization of fragment interactions binding pocket.

+11 −0
Original line number Diff line number Diff line
@@ -31,3 +31,14 @@ class TestContactFeaturizers(unittest.TestCase):
        self.complex_files)
    assert features.shape == (1, voxels_per_edge, voxels_per_edge, voxels_per_edge, size)
    
  def test_contact_voxels_flattened(self):
    box_width = 48 
    voxel_width = 2
    voxels_per_edge = box_width/voxel_width
    size = 8
    voxelizer = dc.feat.ContactCircularVoxelizer(box_width=box_width,
      voxel_width=voxel_width, size=size, flatten=True)
    features, failures = voxelizer.featurize_complexes(
        self.complex_files)
    assert features.shape == (1, int(size*voxels_per_edge**3))