Commit 3199dd61 authored by Vignesh's avatar Vignesh
Browse files

SmilesToImage featurizer for datasets

parent 2417df0f
Loading
Loading
Loading
Loading
+5 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ import deepchem
logger = logging.getLogger(__name__)


def load_hiv(featurizer='ECFP', split='index', reload=True):
def load_hiv(featurizer='ECFP', split='index', reload=True, **kwargs):
  """Load hiv datasets. Does not do train/test split"""
  # Featurize hiv dataset
  logger.info("About to featurize hiv dataset.")
@@ -41,6 +41,10 @@ def load_hiv(featurizer='ECFP', split='index', reload=True):
    featurizer = deepchem.feat.WeaveFeaturizer()
  elif featurizer == 'Raw':
    featurizer = deepchem.feat.RawFeaturizer()
  elif featurizer == "smiles2img":
    img_spec = kwargs.get("img_spec", "std")
    logger.info(img_spec)
    featurizer = deepchem.feat.SmilesToImage(img_spec=img_spec)

  loader = deepchem.data.CSVLoader(
      tasks=hiv_tasks, smiles_field="smiles", featurizer=featurizer)
+5 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ import deepchem
logger = logging.getLogger(__name__)


def load_sampl(featurizer='ECFP', split='index', reload=True, move_mean=True):
def load_sampl(featurizer='ECFP', split='index', reload=True, move_mean=True, **kwargs):
  """Load SAMPL datasets."""
  # Featurize SAMPL dataset
  logger.info("About to featurize SAMPL dataset.")
@@ -46,6 +46,10 @@ def load_sampl(featurizer='ECFP', split='index', reload=True, move_mean=True):
    featurizer = deepchem.feat.WeaveFeaturizer()
  elif featurizer == 'Raw':
    featurizer = deepchem.feat.RawFeaturizer()
  elif featurizer == 'smiles2img':
    img_spec = kwargs.get("img_spec", "std")
    logger.info(img_spec)
    featurizer = deepchem.feat.SmilesToImage(img_spec=img_spec)

  loader = deepchem.data.CSVLoader(
      tasks=SAMPL_tasks, smiles_field="smiles", featurizer=featurizer)
+5 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ import deepchem
logger = logging.getLogger(__name__)


def load_tox21(featurizer='ECFP', split='index', reload=True, K=4):
def load_tox21(featurizer='ECFP', split='index', reload=True, K=4, **kwargs):
  """Load Tox21 datasets. Does not do train/test split"""
  # Featurize Tox21 dataset

@@ -45,6 +45,10 @@ def load_tox21(featurizer='ECFP', split='index', reload=True, K=4):
  elif featurizer == 'AdjacencyConv':
    featurizer = deepchem.feat.AdjacencyFingerprint(
        max_n_atoms=150, max_valence=6)
  elif featurizer == "smiles2img":
      img_spec = kwargs.get("img_spec", "std")
      logger.info(img_spec)
      featurizer = deepchem.feat.SmilesToImage(img_spec=img_spec)

  loader = deepchem.data.CSVLoader(
      tasks=tox21_tasks, smiles_field="smiles", featurizer=featurizer)