Unverified Commit 814cc52e authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1902 from peastman/slow

Fix failing slow test
parents 90962639 d7c190a8
Loading
Loading
Loading
Loading
+38 −23
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ import logging
import numpy as np
import os
import tempfile
import tarfile
from subprocess import call
from deepchem.utils.rdkit_util import add_hydrogens_to_mol
from subprocess import check_output
@@ -14,6 +15,7 @@ from deepchem.utils import rdkit_util
from deepchem.utils import mol_xyz_util
from deepchem.utils import geometry_utils
from deepchem.utils import vina_utils
from deepchem.utils import download_url

logger = logging.getLogger(__name__)

@@ -105,6 +107,8 @@ class VinaPoseGenerator(PoseGenerator):
      url = "http://vina.scripps.edu/download/autodock_vina_1_1_2_linux_x86.tgz"
      filename = "autodock_vina_1_1_2_linux_x86.tgz"
      dirname = "autodock_vina_1_1_2_linux_x86"
      self.vina_dir = os.path.join(data_dir, dirname)
      self.vina_cmd = os.path.join(self.vina_dir, "bin/vina")
    elif platform.system() == 'Darwin':
      if sixty_four_bits:
        url = "http://vina.scripps.edu/download/autodock_vina_1_1_2_mac_64bit.tar.gz"
@@ -114,26 +118,31 @@ class VinaPoseGenerator(PoseGenerator):
        url = "http://vina.scripps.edu/download/autodock_vina_1_1_2_mac.tgz"
        filename = "autodock_vina_1_1_2_mac.tgz"
        dirname = "autodock_vina_1_1_2_mac"
      self.vina_dir = os.path.join(data_dir, dirname)
      self.vina_cmd = os.path.join(self.vina_dir, "bin/vina")
    elif platform.system() == 'Windows':
      url = "http://vina.scripps.edu/download/autodock_vina_1_1_2_win32.msi"
      filename = "autodock_vina_1_1_2_win32.msi"
      self.vina_dir = "\\Program Files (x86)\\The Scripps Research Institute\\Vina"
      self.vina_cmd = os.path.join(self.vina_dir, "vina.exe")
    else:
      raise ValueError(
          "This class can only run on Linux or Mac. If you are on Windows, please try using a cloud platform to run this code instead."
          "Unknown operating system.  Try using a cloud platform to run this code instead."
      )
    self.vina_dir = os.path.join(data_dir, dirname)
    self.pocket_finder = pocket_finder
    if not os.path.exists(self.vina_dir):
      logger.info("Vina not available. Downloading")
      wget_cmd = "wget -nv -c -T 15 %s" % url
      check_output(wget_cmd.split())
      download_url(url, data_dir)
      downloaded_file = os.path.join(data_dir, filename)
      logger.info("Downloaded Vina. Extracting")
      untar_cmd = "tar -xzvf %s" % filename
      check_output(untar_cmd.split())
      logger.info("Moving to final location")
      mv_cmd = "mv %s %s" % (dirname, data_dir)
      check_output(mv_cmd.split())
      if platform.system() == 'Windows':
        msi_cmd = "msiexec /i %s" % downloaded_file
        check_output(msi_cmd.split())
      else:
        with tarfile.open(downloaded_file) as tar:
          tar.extractall(data_dir)
      logger.info("Cleanup: removing downloaded vina tar.gz")
      rm_cmd = "rm %s" % filename
      call(rm_cmd.split())
    self.vina_cmd = os.path.join(self.vina_dir, "bin/vina")
      os.remove(downloaded_file)

  def generate_poses(self,
                     molecular_complex,
@@ -207,6 +216,8 @@ class VinaPoseGenerator(PoseGenerator):
    protein_pdbqt = os.path.join(out_dir, "%s.pdbqt" % protein_name)
    protein_mol = rdkit_util.load_molecule(
        protein_file, calc_charges=True, add_hydrogens=True)
    rdkit_util.write_molecule(protein_mol[1], protein_hyd, is_protein=True)
    rdkit_util.write_molecule(protein_mol[1], protein_pdbqt, is_protein=True)

    # Get protein centroid and range
    if centroid is not None and box_dims is not None:
@@ -215,9 +226,6 @@ class VinaPoseGenerator(PoseGenerator):
    else:
      if self.pocket_finder is None:
        logger.info("Pockets not specified. Will use whole protein to dock")
        rdkit_util.write_molecule(protein_mol[1], protein_hyd, is_protein=True)
        rdkit_util.write_molecule(
            protein_mol[1], protein_pdbqt, is_protein=True)
        protein_centroid = geometry_utils.compute_centroid(protein_mol[0])
        protein_range = mol_xyz_util.get_molecule_range(protein_mol[0])
        box_dims = protein_range + 5.0
@@ -276,10 +284,17 @@ class VinaPoseGenerator(PoseGenerator):
      log_file = os.path.join(out_dir, "%s_log.txt" % ligand_name)
      out_pdbqt = os.path.join(out_dir, "%s_docked.pdbqt" % ligand_name)
      logger.info("About to call Vina")
      call(
          "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file,
                                                log_file, out_pdbqt),
          shell=True)
      if platform.system() == 'Windows':
        args = [
            self.vina_cmd, "--config", conf_file, "--log", log_file, "--out",
            out_pdbqt
        ]
      else:
        # I'm not sure why specifying the args as a list fails on other platforms,
        # but for some reason it only works if I pass it as a string.
        args = "%s --config %s --log %s --out %s" % (self.vina_cmd, conf_file,
                                                     log_file, out_pdbqt)
      call(args, shell=True)
      ligands, scores = vina_utils.load_docked_ligands(out_pdbqt)
      docked_complexes += [(protein_mol[1], ligand) for ligand in ligands]
      all_scores += scores
+32 −29
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@ Tests for Pose Generation
"""
import os
import sys
import tempfile
import unittest
import logging
import numpy as np
@@ -16,12 +17,10 @@ class TestPoseGeneration(unittest.TestCase):
  Does sanity checks on pose generation.
  """

  @pytest.mark.slow
  def test_vina_initialization(self):
    """Test that VinaPoseGenerator can be initialized."""
    vpg = dc.dock.VinaPoseGenerator()

  @pytest.mark.slow
  def test_pocket_vina_initialization(self):
    """Test that VinaPoseGenerator can be initialized."""
    pocket_finder = ConvexHullPocketFinder()
@@ -41,11 +40,12 @@ class TestPoseGeneration(unittest.TestCase):
    ligand_file = os.path.join(current_dir, "1jld_ligand.sdf")

    vpg = dc.dock.VinaPoseGenerator(pocket_finder=None)
    with tempfile.TemporaryDirectory() as tmp:
      poses, scores = vpg.generate_poses(
          (protein_file, ligand_file),
          exhaustiveness=1,
          num_modes=1,
        out_dir="/tmp",
          out_dir=tmp,
          generate_scores=True)

    assert len(poses) == 1
@@ -69,11 +69,12 @@ class TestPoseGeneration(unittest.TestCase):
    ligand_file = os.path.join(current_dir, "1jld_ligand.sdf")

    vpg = dc.dock.VinaPoseGenerator(pocket_finder=None)
    with tempfile.TemporaryDirectory() as tmp:
      poses = vpg.generate_poses(
          (protein_file, ligand_file),
          exhaustiveness=1,
          num_modes=1,
        out_dir="/tmp",
          out_dir=tmp,
          generate_scores=False)

    assert len(poses) == 1
@@ -98,13 +99,14 @@ class TestPoseGeneration(unittest.TestCase):
    centroid = np.array([56.21891368, 25.95862964, 3.58950065])
    box_dims = np.array([51.354, 51.243, 55.608])
    vpg = dc.dock.VinaPoseGenerator(pocket_finder=None)
    with tempfile.TemporaryDirectory() as tmp:
      poses, scores = vpg.generate_poses(
          (protein_file, ligand_file),
          centroid=centroid,
          box_dims=box_dims,
          exhaustiveness=1,
          num_modes=1,
        out_dir="/tmp",
          out_dir=tmp,
          generate_scores=True)

    assert len(poses) == 1
@@ -130,12 +132,13 @@ class TestPoseGeneration(unittest.TestCase):
    # Note this may download autodock Vina...
    convex_finder = dc.dock.ConvexHullPocketFinder()
    vpg = dc.dock.VinaPoseGenerator(pocket_finder=convex_finder)
    with tempfile.TemporaryDirectory() as tmp:
      poses, scores = vpg.generate_poses(
          (protein_file, ligand_file),
          exhaustiveness=1,
          num_modes=1,
          num_pockets=2,
        out_dir="/tmp",
          out_dir=tmp,
          generate_scores=True)

    assert len(poses) == 2
+13 −21
Original line number Diff line number Diff line
@@ -604,18 +604,11 @@ class AtomicConvFeaturizer(ComplexNeighborListFragmentAtomicCoordinates):
    self.labels = labels

  def featurize_complexes(self, mol_files, protein_files):
    pool = multiprocessing.Pool()
    results = []
    for i, (mol_file, protein_pdb) in enumerate(zip(mol_files, protein_files)):
      log_message = "Featurizing %d / %d" % (i, len(mol_files))
      results.append(
          pool.apply_async(_featurize_complex,
                           (self, mol_file, protein_pdb, log_message)))
    pool.close()
    features = []
    failures = []
    for ind, result in enumerate(results):
      new_features = result.get()
    for i, (mol_file, protein_pdb) in enumerate(zip(mol_files, protein_files)):
      logging.info("Featurizing %d / %d" % (i, len(mol_files)))
      new_features = self._featurize_complex(mol_file, protein_pdb)
      # Handle loading failures which return None
      if new_features is not None:
        features.append(new_features)
@@ -630,20 +623,19 @@ class AtomicConvFeaturizer(ComplexNeighborListFragmentAtomicCoordinates):
    self.atomic_conv_model.fit(dataset, nb_epoch=self.epochs)

    # Add the Atomic Convolution layers to fetches
    layers_to_fetch = list()
    for layer in self.atomic_conv_model.layers.values():
      if isinstance(layer, dc.models.atomic_conv.AtomicConvolution):
        layers_to_fetch.append(layer)
    layers_to_fetch = [
        self.atomic_conv_model._frag1_conv, self.atomic_conv_model._frag2_conv,
        self.atomic_conv_model._complex_conv
    ]

    # Extract the atomic convolution features
    atomic_conv_features = list()
    feed_dict_generator = self.atomic_conv_model.default_generator(
    batch_generator = self.atomic_conv_model.default_generator(
        dataset=dataset, epochs=1)

    for feed_dict in self.atomic_conv_model._create_feed_dicts(
        feed_dict_generator, training=False):
      frag1_conv, frag2_conv, complex_conv = self.atomic_conv_model._run_graph(
          outputs=layers_to_fetch, feed_dict=feed_dict, training=False)
    for X, y, w in batch_generator:
      frag1_conv, frag2_conv, complex_conv = self.atomic_conv_model.predict_on_generator(
          [(X, y, w)], outputs=layers_to_fetch)
      concatenated = np.concatenate(
          [frag1_conv, frag2_conv, complex_conv], axis=1)
      atomic_conv_features.append(concatenated)
+8 −6
Original line number Diff line number Diff line
@@ -221,20 +221,22 @@ class AtomicConvModel(KerasModel):
    complex_nbrs_z = Input(shape=(complex_num_atoms, max_num_neighbors))
    complex_z = Input(shape=(complex_num_atoms,))

    frag1_conv = AtomicConvolution(
    self._frag1_conv = AtomicConvolution(
        atom_types=self.atom_types, radial_params=rp,
        boxsize=None)([frag1_X, frag1_nbrs, frag1_nbrs_z])

    frag2_conv = AtomicConvolution(
    self._frag2_conv = AtomicConvolution(
        atom_types=self.atom_types, radial_params=rp,
        boxsize=None)([frag2_X, frag2_nbrs, frag2_nbrs_z])

    complex_conv = AtomicConvolution(
    self._complex_conv = AtomicConvolution(
        atom_types=self.atom_types, radial_params=rp,
        boxsize=None)([complex_X, complex_nbrs, complex_nbrs_z])

    score = AtomicConvScore(self.atom_types, layer_sizes)(
        [frag1_conv, frag2_conv, complex_conv, frag1_z, frag2_z, complex_z])
    score = AtomicConvScore(self.atom_types, layer_sizes)([
        self._frag1_conv, self._frag2_conv, self._complex_conv, frag1_z,
        frag2_z, complex_z
    ])

    model = tf.keras.Model(
        inputs=[
+2 −2
Original line number Diff line number Diff line
@@ -107,9 +107,9 @@ class TestAtomicConv(unittest.TestCase):
    """A simple test for running an atomic convolution on featurized data."""
    dir_path = os.path.dirname(os.path.realpath(__file__))
    ligand_file = os.path.join(dir_path,
                               "../../../feat/tests/data/3zso_ligand_hyd.pdb")
                               "../../feat/tests/data/3zso_ligand_hyd.pdb")
    protein_file = os.path.join(dir_path,
                                "../../../feat/tests/data/3zso_protein.pdb")
                                "../../feat/tests/data/3zso_protein.pdb")
    # Pulled from PDB files. For larger datasets with more PDBs, would use
    # max num atoms instead of exact.
    frag1_num_atoms = 44  # for ligand atoms
+1 −1

File changed.

Contains only whitespace changes.

Loading