Commit dd6f32b0 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #517 from lilleswing/tg-rewrite-clean

TensorGraph Refactor w/ Generators
parents 53718b6d bd5a2fd2
Loading
Loading
Loading
Loading
+8 −8
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ import numpy as np
from rdkit import Chem
from deepchem.feat import Featurizer
from deepchem.feat import ComplexFeaturizer
from deepchem.utils import pad_array
from deepchem.utils import pad_array, rdkit_util


def get_cells(coords, neighbor_cutoff):
@@ -153,9 +153,8 @@ def compute_neighbor_cell_map(N_x, N_y, N_z):
        for x_offset in offsets:
          for y_offset in offsets:
            for z_offset in offsets:
              neighbors.append(
                  ((x_ind + x_offset) % N_x, (y_ind + y_offset) % N_y,
                   (z_ind + z_offset) % N_z))
              neighbors.append(((x_ind + x_offset) % N_x, (y_ind + y_offset) %
                                N_y, (z_ind + z_offset) % N_z))
        neighbor_cell_map[(x_ind, y_ind, z_ind)] = neighbors
  return neighbor_cell_map

@@ -245,6 +244,7 @@ class NeighborListAtomicCoordinates(Featurizer):
      Molecule

    """
    print(mol)
    N = mol.GetNumAtoms()
    coords = get_coords(mol)

@@ -394,10 +394,10 @@ class ComplexNeighborListFragmentAtomicCoordinates(ComplexFeaturizer):
    """

    try:
      frag1_mol = Chem.MolFromPDBFile(
          frag1_pdb_file, sanitize=False, removeHs=False)
      frag2_mol = Chem.MolFromPDBFile(
          frag2_pdb_file, sanitize=False, removeHs=False)
      frag1_mol = rdkit_util.load_molecule(
          frag1_pdb_file, add_hydrogens=False, calc_charges=False)[1]
      frag2_mol = rdkit_util.load_molecule(
          frag2_pdb_file, add_hydrogens=False, calc_charges=False)[1]
    except:
      frag1_mol = None
      frag2_mol = None
+2 −1
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ from __future__ import unicode_literals
import os
import numpy as np
import pandas as pd
import deepchem as dc
from atomicnet_coordinates import ComplexNeighborListFragmentAtomicCoordinates


@@ -68,7 +69,7 @@ def compute_pdbbind_coordinate_features(complex_featurizer, pdb_subdir,
  """

  protein_file = os.path.join(pdb_subdir, "%s_pocket.pdb" % pdb_code)
  ligand_file = os.path.join(pdb_subdir, "%s_ligand.pdb" % pdb_code)
  ligand_file = os.path.join(pdb_subdir, "%s_ligand.sdf" % pdb_code)
  feature = complex_featurizer._featurize_complex(
      str(ligand_file), str(protein_file))
  return feature
+6 −8
Original line number Diff line number Diff line
@@ -9,17 +9,15 @@ __license__ = "MIT"
import os
import sys
from subprocess import call
from atomicnet_pdbbind_datasets import load_core_pdbbind_fragment_coordinates
from atomicnet_pdbbind_datasets import load_pdbbind_fragment_coordinates

call([
    "wget",
    "wget", "-c",
    "http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/pdbbind_v2015.tar.gz"
])
if not os.path.exists("v2015"):
  call(["tar", "-xvzf", "pdbbind_v2015.tar.gz"])

# This could be done with openbabel in python
call(["convert_ligand_sdf_to_pdb.sh"])

base_dir = os.getcwd()
pdbbind_dir = os.path.join(base_dir, "v2015")
datafile = "INDEX_core_data.2013"
@@ -30,6 +28,6 @@ complex_num_atoms = 908
max_num_neighbors = 8
neighbor_cutoff = 12.0

pdbbind_tasks, dataset, transformers = load_core_pdbbind_fragment_coordinates(
pdbbind_tasks, dataset, transformers = load_pdbbind_fragment_coordinates(
    frag1_num_atoms, frag2_num_atoms, complex_num_atoms, max_num_neighbors,
    neighbor_cutoff, pdbbind_dir, base_dir, datafile)
    neighbor_cutoff, pdbbind_dir, base_dir, str(datafile))
+57 −4
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@ import os
import numpy as np
import pandas as pd
import random
import six
from functools import partial
from deepchem.utils.save import save_to_disk
from deepchem.utils.save import load_from_disk
@@ -160,6 +161,16 @@ class Dataset(object):
                  epoch=0,
                  deterministic=False,
                  pad_batches=False):
    """
    
    Parameters
    ----------
   

    Returns
    -------

    """
    """Get an object that iterates over minibatches from the dataset.

    Each minibatch is returned as a tuple of four numpy arrays: (X, y, w, ids).
@@ -1031,3 +1042,45 @@ class DiskDataset(Dataset):
  def get_label_stds(self):
    """Return pandas series of label stds."""
    return self.metadata_df["y_stds"]


class Databag(object):
  """
  A utility class to iterate through multiple datasets together.
  """

  def __init__(self):
    self.datasets = dict()

  def add_dataset(self, key, dataset):
    self.datasets[key] = dataset

  def iterbatches(self, **kwargs):
    """
    Loop through all internal datasets in the same order
    Parameters
    ----------
    batch_size: int
      Number of samples from each dataset to return
    epoch: int
      Number of times to loop through the datasets
    pad_batches: boolean
      Should all batches==batch_size

    Returns
    -------
    Generator which yields a dictionary {key: dataset.X[batch]}

    """
    key_order = [x for x in self.datasets.keys()]
    if "epochs" in kwargs:
      epochs = kwargs['epochs']
      del kwargs['epochs']
    else:
      epochs = 1
    kwargs['deterministic'] = True
    for epoch in range(epochs):
      iterators = [self.datasets[x].iterbatches(**kwargs) for x in key_order]
      for tup in zip(*iterators):
        m_d = {key_order[i]: tup[i][0] for i in range(len(key_order))}
        yield m_d
+1 −1
Original line number Diff line number Diff line
@@ -203,7 +203,7 @@ class Metric(object):
    else:
      y_pred = np.reshape(y_pred, (n_samples, n_tasks))
    y_true = np.reshape(y_true, (n_samples, n_tasks))
    if w is None:
    if w is None or len(w) == 0:
      w = np.ones_like(y_true)
    assert y_true.shape[0] == y_pred.shape[0] == w.shape[0]
    computed_metrics = []
Loading