Commit a85d57f1 authored by miaecle's avatar miaecle
Browse files

adding raw featurizer

parent 297eff9d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -21,3 +21,4 @@ from deepchem.feat.grid_featurizer import GridFeaturizer
from deepchem.feat.nnscore_utils import hydrogenate_and_compute_partial_charges
from deepchem.feat.binding_pocket_features import BindingPocketFeaturizer
from deepchem.feat.one_hot import OneHotFeaturizer
from deepchem.feat.raw_featurizer import RawFeaturizer
+19 −0
Original line number Diff line number Diff line
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

from rdkit import Chem
from deepchem.feat import Featurizer

class RawFeaturizer(Featurizer):

  def __init__(self, smiles=False):
    self.smiles = smiles
    
  def _featurize(self, mol):
    if self.smiles:
      return Chem.MolToSmiles(mol)
    else:
      return mol
 No newline at end of file
+15 −15
Original line number Diff line number Diff line

from deepchem.molnet.chembl_datasets import load_chembl
from deepchem.molnet.clintox_datasets import load_clintox
from deepchem.molnet.delaney_datasets import load_delaney
from deepchem.molnet.hiv_datasets import load_hiv
from deepchem.molnet.kaggle_datasets import load_kaggle
from deepchem.molnet.muv_datasets import load_muv
from deepchem.molnet.nci_datasets import load_nci
from deepchem.molnet.pcba_datasets import load_pcba
from deepchem.molnet.pdbbind_datasets import load_pdbbind_grid
from deepchem.molnet.qm7_datasets import load_qm7_from_mat, load_qm7b_from_mat
from deepchem.molnet.qm9_datasets import load_qm9
from deepchem.molnet.sampl_datasets import load_sampl
from deepchem.molnet.sider_datasets import load_sider
from deepchem.molnet.tox21_datasets import load_tox21
from deepchem.molnet.toxcast_datasets import load_toxcast
from deepchem.molnet.load_function.chembl_datasets import load_chembl
from deepchem.molnet.load_function.clintox_datasets import load_clintox
from deepchem.molnet.load_function.delaney_datasets import load_delaney
from deepchem.molnet.load_function.hiv_datasets import load_hiv
from deepchem.molnet.load_function.kaggle_datasets import load_kaggle
from deepchem.molnet.load_function.muv_datasets import load_muv
from deepchem.molnet.load_function.nci_datasets import load_nci
from deepchem.molnet.load_function.pcba_datasets import load_pcba
from deepchem.molnet.load_function.pdbbind_datasets import load_pdbbind_grid
from deepchem.molnet.load_function.qm7_datasets import load_qm7_from_mat, load_qm7b_from_mat
from deepchem.molnet.load_function.qm9_datasets import load_qm9
from deepchem.molnet.load_function.sampl_datasets import load_sampl
from deepchem.molnet.load_function.sider_datasets import load_sider
from deepchem.molnet.load_function.tox21_datasets import load_tox21
from deepchem.molnet.load_function.toxcast_datasets import load_toxcast

from deepchem.molnet.run_benchmark import run_benchmark
+0 −0

Empty file added.

+7 −4
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@ from __future__ import unicode_literals
import os
import deepchem as dc

from deepchem.molnet.chembl_tasks import chembl_tasks
from deepchem.molnet.load_function.chembl_tasks import chembl_tasks

def load_chembl(shard_size=2000, featurizer="ECFP", set="5thresh", split="random"):

@@ -48,9 +48,12 @@ def load_chembl(shard_size=2000, featurizer="ECFP", set="5thresh", split="random

  # Featurize ChEMBL dataset
  print("About to featurize ChEMBL dataset.")
  featurizers = {'ECFP': dc.feat.CircularFingerprint(size=1024),
                 'GraphConv': dc.feat.ConvMolFeaturizer()}
  featurizer = featurizers[featurizer]
  if featurizer == 'ECFP':
    featurizer = dc.feat.CircularFingerprint(size=1024)
  elif featurizer == 'GraphConv':
    featurizer = dc.feat.ConvMolFeaturizer()
  elif featurizer == 'Raw':
    featurizer = dc.feat.RawFeaturizer()

  loader = dc.data.CSVLoader(
      tasks=chembl_tasks, smiles_field="smiles", featurizer=featurizer)
Loading