Commit a45023c4 authored by Bharath Ramsundar's avatar Bharath Ramsundar
Browse files

Merge branch 'master' of https://github.com/deepchem/deepchem into simulations

parents 058881b1 7be7c5e2
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@ install:
- conda config --add channels http://conda.binstar.org/omnia
- bash scripts/install_deepchem_conda.sh deepchem
- source activate deepchem
- pip install yapf==0.19.0
- pip install yapf==0.20.0
- pip install coveralls
- cd $HOME
- git clone https://github.com/kundajelab/simdna.git
+18 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@ from pandas import read_hdf
import tempfile
import time
import shutil
import json
from multiprocessing.dummy import Pool

__author__ = "Bharath Ramsundar"
@@ -432,6 +433,23 @@ class NumpyDataset(Dataset):
    """
    return NumpyDataset(ds.X, ds.y, ds.w, ds.ids)

  @staticmethod
  def to_json(self, fname):
    d = {
        'X': self.X.tolist(),
        'y': self.y.tolist(),
        'w': self.w.tolist(),
        'ids': self.ids.tolist()
    }
    with open(fname, 'w') as fout:
      json.dump(d, fout)

  @staticmethod
  def from_json(fname):
    with open(fname) as fin:
      d = json.load(fin)
      return NumpyDataset(d['X'], d['y'], d['w'], d['ids'])


class DiskDataset(Dataset):
  """
+7 −13
Original line number Diff line number Diff line
@@ -18,6 +18,8 @@ import numpy as np
import deepchem as dc
from sklearn.ensemble import RandomForestRegressor
from subprocess import call
from deepchem.utils import download_url
from deepchem.utils import get_data_dir


class TestPoseScoring(unittest.TestCase):
@@ -27,20 +29,14 @@ class TestPoseScoring(unittest.TestCase):

  def setUp(self):
    """Downloads dataset."""
    call(
        "wget -nv -c http://deepchem.io.s3-website-us-west-1.amazonaws.com/featurized_datasets/core_grid.tar.gz".
        split())
    call("tar -zxvf core_grid.tar.gz".split())
    self.core_dataset = dc.data.DiskDataset("core_grid/")

  def tearDown(self):
    """Removes dataset"""
    call("rm -rf core_grid/".split())
    download_url(
        "http://deepchem.io.s3-website-us-west-1.amazonaws.com/featurized_datasets/core_grid.json"
    )
    json_fname = os.path.join(get_data_dir(), 'core_grid.json')
    self.core_dataset = dc.data.NumpyDataset.from_json(json_fname)

  def test_pose_scorer_init(self):
    """Tests that pose-score works."""
    if sys.version_info >= (3, 0):
      return
    sklearn_model = RandomForestRegressor(n_estimators=10)
    model = dc.models.SklearnModel(sklearn_model)
    print("About to fit model on core set")
@@ -50,8 +46,6 @@ class TestPoseScoring(unittest.TestCase):

  def test_pose_scorer_score(self):
    """Tests that scores are generated"""
    if sys.version_info >= (3, 0):
      return
    current_dir = os.path.dirname(os.path.realpath(__file__))
    protein_file = os.path.join(current_dir, "1jld_protein.pdb")
    ligand_file = os.path.join(current_dir, "1jld_ligand.sdf")
+33 −116
Original line number Diff line number Diff line
@@ -44,123 +44,41 @@ class RDKitDescriptors(Featurizer):

  # (ytz): This is done to avoid future compatibility issues like inclusion of
  # the 3D descriptors or changing the feature size.
  allowedDescriptors = set(['MaxAbsPartialCharge',
    'MinPartialCharge',
    'MinAbsPartialCharge',
    'HeavyAtomMolWt',
    'MaxAbsEStateIndex',
    'NumRadicalElectrons',
    'NumValenceElectrons',
    'MinAbsEStateIndex',
    'MaxEStateIndex',
    'MaxPartialCharge',
    'MinEStateIndex',
    'ExactMolWt',
    'MolWt',
    'BalabanJ',
    'BertzCT',
    'Chi0',
    'Chi0n',
    'Chi0v',
    'Chi1',
    'Chi1n',
    'Chi1v',
    'Chi2n',
    'Chi2v',
    'Chi3n',
    'Chi3v',
    'Chi4n',
    'Chi4v',
    'HallKierAlpha',
    'Ipc',
    'Kappa1',
    'Kappa2',
    'Kappa3',
    'LabuteASA',
    'PEOE_VSA1',
    'PEOE_VSA10',
    'PEOE_VSA11',
    'PEOE_VSA12',
    'PEOE_VSA13',
    'PEOE_VSA14',
    'PEOE_VSA2',
    'PEOE_VSA3',
    'PEOE_VSA4',
    'PEOE_VSA5',
    'PEOE_VSA6',
    'PEOE_VSA7',
    'PEOE_VSA8',
    'PEOE_VSA9',
    'SMR_VSA1',
    'SMR_VSA10',
    'SMR_VSA2',
    'SMR_VSA3',
    'SMR_VSA4',
    'SMR_VSA5',
    'SMR_VSA6',
    'SMR_VSA7',
    'SMR_VSA8',
    'SMR_VSA9',
    'SlogP_VSA1',
    'SlogP_VSA10',
    'SlogP_VSA11',
    'SlogP_VSA12',
    'SlogP_VSA2',
    'SlogP_VSA3',
    'SlogP_VSA4',
    'SlogP_VSA5',
    'SlogP_VSA6',
    'SlogP_VSA7',
    'SlogP_VSA8',
    'SlogP_VSA9',
    'TPSA',
    'EState_VSA1',
    'EState_VSA10',
    'EState_VSA11',
    'EState_VSA2',
    'EState_VSA3',
    'EState_VSA4',
    'EState_VSA5',
    'EState_VSA6',
    'EState_VSA7',
    'EState_VSA8',
    'EState_VSA9',
    'VSA_EState1',
    'VSA_EState10',
    'VSA_EState2',
    'VSA_EState3',
    'VSA_EState4',
    'VSA_EState5',
    'VSA_EState6',
    'VSA_EState7',
    'VSA_EState8',
    'VSA_EState9',
    'FractionCSP3',
    'HeavyAtomCount',
    'NHOHCount',
    'NOCount',
    'NumAliphaticCarbocycles',
    'NumAliphaticHeterocycles',
    'NumAliphaticRings',
    'NumAromaticCarbocycles',
    'NumAromaticHeterocycles',
    'NumAromaticRings',
    'NumHAcceptors',
    'NumHDonors',
    'NumHeteroatoms',
    'NumRotatableBonds',
    'NumSaturatedCarbocycles',
    'NumSaturatedHeterocycles',
    'NumSaturatedRings',
    'RingCount',
    'MolLogP',
    'MolMR'])

  allowedDescriptors = set([
      'MaxAbsPartialCharge', 'MinPartialCharge', 'MinAbsPartialCharge',
      'HeavyAtomMolWt', 'MaxAbsEStateIndex', 'NumRadicalElectrons',
      'NumValenceElectrons', 'MinAbsEStateIndex', 'MaxEStateIndex',
      'MaxPartialCharge', 'MinEStateIndex', 'ExactMolWt', 'MolWt', 'BalabanJ',
      'BertzCT', 'Chi0', 'Chi0n', 'Chi0v', 'Chi1', 'Chi1n', 'Chi1v', 'Chi2n',
      'Chi2v', 'Chi3n', 'Chi3v', 'Chi4n', 'Chi4v', 'HallKierAlpha', 'Ipc',
      'Kappa1', 'Kappa2', 'Kappa3', 'LabuteASA', 'PEOE_VSA1', 'PEOE_VSA10',
      'PEOE_VSA11', 'PEOE_VSA12', 'PEOE_VSA13', 'PEOE_VSA14', 'PEOE_VSA2',
      'PEOE_VSA3', 'PEOE_VSA4', 'PEOE_VSA5', 'PEOE_VSA6', 'PEOE_VSA7',
      'PEOE_VSA8', 'PEOE_VSA9', 'SMR_VSA1', 'SMR_VSA10', 'SMR_VSA2', 'SMR_VSA3',
      'SMR_VSA4', 'SMR_VSA5', 'SMR_VSA6', 'SMR_VSA7', 'SMR_VSA8', 'SMR_VSA9',
      'SlogP_VSA1', 'SlogP_VSA10', 'SlogP_VSA11', 'SlogP_VSA12', 'SlogP_VSA2',
      'SlogP_VSA3', 'SlogP_VSA4', 'SlogP_VSA5', 'SlogP_VSA6', 'SlogP_VSA7',
      'SlogP_VSA8', 'SlogP_VSA9', 'TPSA', 'EState_VSA1', 'EState_VSA10',
      'EState_VSA11', 'EState_VSA2', 'EState_VSA3', 'EState_VSA4',
      'EState_VSA5', 'EState_VSA6', 'EState_VSA7', 'EState_VSA8', 'EState_VSA9',
      'VSA_EState1', 'VSA_EState10', 'VSA_EState2', 'VSA_EState3',
      'VSA_EState4', 'VSA_EState5', 'VSA_EState6', 'VSA_EState7', 'VSA_EState8',
      'VSA_EState9', 'FractionCSP3', 'HeavyAtomCount', 'NHOHCount', 'NOCount',
      'NumAliphaticCarbocycles', 'NumAliphaticHeterocycles',
      'NumAliphaticRings', 'NumAromaticCarbocycles', 'NumAromaticHeterocycles',
      'NumAromaticRings', 'NumHAcceptors', 'NumHDonors', 'NumHeteroatoms',
      'NumRotatableBonds', 'NumSaturatedCarbocycles',
      'NumSaturatedHeterocycles', 'NumSaturatedRings', 'RingCount', 'MolLogP',
      'MolMR'
  ])

  def __init__(self):
    self.descriptors = []
    self.descList = []
    for descriptor, function in Descriptors.descList:
      if descriptor in self.allowedDescriptors:
        self.descriptors.append(descriptor)
        self.descList.append((descriptor, function))

  def _featurize(self, mol):
    """
@@ -172,7 +90,6 @@ class RDKitDescriptors(Featurizer):
        Molecule.
    """
    rval = []
    for desc_name, function in Descriptors.descList:
      if desc_name in self.allowedDescriptors:
    for desc_name, function in self.descList:
      rval.append(function(mol))
    return rval
+5 −2
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ class TestMolecularWeight(unittest.TestCase):
  """
  Test MolecularWeight.
  """

  def setUp(self):
    """
    Set up tests.
@@ -32,6 +33,7 @@ class TestRDKitDescriptors(unittest.TestCase):
  """
  Test RDKitDescriptors.
  """

  def setUp(self):
    """
    Set up tests.
@@ -46,5 +48,6 @@ class TestRDKitDescriptors(unittest.TestCase):
    """
    descriptors = self.engine([self.mol])
    assert np.allclose(
      descriptors[0, self.engine.descriptors.index('ExactMolWt')], 180,
        descriptors[0, self.engine.descriptors.index('ExactMolWt')],
        180,
        atol=0.1)
Loading