Unverified Commit 27a3000f authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1497 from VIGS25/load-binding-pocket

Use binding pockets for load_pdbbind
parents 12cfc195 38826304
Loading
Loading
Loading
Loading
+29 −8
Original line number Diff line number Diff line
@@ -139,7 +139,11 @@ def load_pdbbind_grid(split="random",
    return tasks, (train, valid, test), transformers


def load_pdbbind(featurizer="grid", split="random", subset="core", reload=True):
def load_pdbbind(featurizer="grid",
                 load_binding_pocket=False,
                 split="random",
                 subset="core",
                 reload=True):
  """Load and featurize raw PDBBind dataset.
  
  Parameters
@@ -191,9 +195,16 @@ def load_pdbbind(featurizer="grid", split="random", subset="core", reload=True):
      pdb = line[0]
      if len(pdb) == 4:
        pdbs.append(pdb)

  if load_binding_pocket:
    protein_files = [
        os.path.join(data_folder, pdb, "%s_pocket.pdb" % pdb) for pdb in pdbs
    ]
  else:
    protein_files = [
        os.path.join(data_folder, pdb, "%s_protein.pdb" % pdb) for pdb in pdbs
    ]

  ligand_files = [
      os.path.join(data_folder, pdb, "%s_ligand.sdf" % pdb) for pdb in pdbs
  ]
@@ -224,7 +235,13 @@ def load_pdbbind(featurizer="grid", split="random", subset="core", reload=True):
  elif featurizer == "atomic":
    # Pulled from PDB files. For larger datasets with more PDBs, would use
    # max num atoms instead of exact.

    frag1_num_atoms = 70  # for ligand atoms

    if load_binding_pocket:
      frag2_num_atoms = 1000
      complex_num_atoms = 1070
    else:
      frag2_num_atoms = 24000  # for protein atoms
      complex_num_atoms = 24070  # in total
    max_num_neighbors = 4
@@ -236,6 +253,10 @@ def load_pdbbind(featurizer="grid", split="random", subset="core", reload=True):

  elif featurizer == "atomic_conv":
    frag1_num_atoms = 70  # for ligand atoms
    if load_binding_pocket:
      frag2_num_atoms = 1000  # for protein atoms
      complex_num_atoms = 1070  # in total
    else:
      frag2_num_atoms = 24000  # for protein atoms
      complex_num_atoms = 24070  # in total
    max_num_neighbors = 4