Unverified Commit 94809d39 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #1637 from VIGS25/feat-for-datasets

SmilesToImage featurizer for Tox21, Sampl, HIV datasets
parents ba45c808 0a29133a
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ import deepchem
logger = logging.getLogger(__name__)


def load_hiv(featurizer='ECFP', split='index', reload=True):
def load_hiv(featurizer='ECFP', split='index', reload=True, **kwargs):
  """Load hiv datasets. Does not do train/test split"""
  # Featurize hiv dataset
  logger.info("About to featurize hiv dataset.")
@@ -41,6 +41,9 @@ def load_hiv(featurizer='ECFP', split='index', reload=True):
    featurizer = deepchem.feat.WeaveFeaturizer()
  elif featurizer == 'Raw':
    featurizer = deepchem.feat.RawFeaturizer()
  elif featurizer == "smiles2img":
    img_spec = kwargs.get("img_spec", "std")
    featurizer = deepchem.feat.SmilesToImage(img_spec=img_spec)

  loader = deepchem.data.CSVLoader(
      tasks=hiv_tasks, smiles_field="smiles", featurizer=featurizer)
+8 −1
Original line number Diff line number Diff line
@@ -11,7 +11,11 @@ import deepchem
logger = logging.getLogger(__name__)


def load_sampl(featurizer='ECFP', split='index', reload=True, move_mean=True):
def load_sampl(featurizer='ECFP',
               split='index',
               reload=True,
               move_mean=True,
               **kwargs):
  """Load SAMPL datasets."""
  # Featurize SAMPL dataset
  logger.info("About to featurize SAMPL dataset.")
@@ -46,6 +50,9 @@ def load_sampl(featurizer='ECFP', split='index', reload=True, move_mean=True):
    featurizer = deepchem.feat.WeaveFeaturizer()
  elif featurizer == 'Raw':
    featurizer = deepchem.feat.RawFeaturizer()
  elif featurizer == 'smiles2img':
    img_spec = kwargs.get("img_spec", "std")
    featurizer = deepchem.feat.SmilesToImage(img_spec=img_spec)

  loader = deepchem.data.CSVLoader(
      tasks=SAMPL_tasks, smiles_field="smiles", featurizer=featurizer)
+4 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ import deepchem
logger = logging.getLogger(__name__)


def load_tox21(featurizer='ECFP', split='index', reload=True, K=4):
def load_tox21(featurizer='ECFP', split='index', reload=True, K=4, **kwargs):
  """Load Tox21 datasets. Does not do train/test split"""
  # Featurize Tox21 dataset

@@ -45,6 +45,9 @@ def load_tox21(featurizer='ECFP', split='index', reload=True, K=4):
  elif featurizer == 'AdjacencyConv':
    featurizer = deepchem.feat.AdjacencyFingerprint(
        max_n_atoms=150, max_valence=6)
  elif featurizer == "smiles2img":
    img_spec = kwargs.get("img_spec", "std")
    featurizer = deepchem.feat.SmilesToImage(img_spec=img_spec)

  loader = deepchem.data.CSVLoader(
      tasks=tox21_tasks, smiles_field="smiles", featurizer=featurizer)