Unverified Commit e61c7df7 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #2036 from ncfrey/material_molnet_loaders

[WIP] Material MolNet Loaders
parents b13f82de a2686526
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -629,7 +629,7 @@ class JsonLoader(DataLoader):
        if self.id_field:
          ids = shard[self.id_field].values
        else:
          ids = np.ones(len(X))
          ids = np.ones(len(valid_inds))
        ids = ids[valid_inds]

        if len(self.tasks) > 0:
+6 −2
Original line number Diff line number Diff line
@@ -37,6 +37,10 @@ class ElementPropertyFingerprint(MaterialCompositionFeaturizer):

  .. [4] Pymatgen: Ong, S.P. et al. Comput. Mater. Sci. 68, 314-319 (2013).

  Notes
  -----
  `NaN` feature values are automatically converted to 0 by this featurizer.  

  """

  def __init__(self, data_source='matminer'):
@@ -78,7 +82,7 @@ class ElementPropertyFingerprint(MaterialCompositionFeaturizer):
    except:
      feats = []

    return np.array(feats)
    return np.nan_to_num(np.array(feats))


class SineCoulombMatrix(MaterialStructureFeaturizer):
+2 −0
Original line number Diff line number Diff line
@@ -31,6 +31,8 @@ from deepchem.molnet.load_function.kinase_datasets import load_kinase
from deepchem.molnet.load_function.thermosol_datasets import load_thermosol
from deepchem.molnet.load_function.hppb_datasets import load_hppb
from deepchem.molnet.load_function.chembl25_datasets import load_chembl25
from deepchem.molnet.load_function.material_datasets.load_bandgap import load_bandgap
from deepchem.molnet.load_function.material_datasets.load_perovskite import load_perovskite

from deepchem.molnet.dnasim import simulate_motif_density_localization
from deepchem.molnet.dnasim import simulate_motif_counting
+11 −18
Original line number Diff line number Diff line
@@ -29,7 +29,7 @@ DEFAULT_FEATURIZERS = {k: DEFAULT_FEATURIZERS[k] for k in mydataset_featurizers}
DEFAULT_TRANSFORMERS = get_defaults("trans")

# dict of accepted splitters
DEFAULT_SPLITTERS = get_defaults("split")
DEFAULT_SPLITTERS = get_defaults("splits")

# names of supported splitters
mydataset_splitters = ['Splitter1', 'Splitter2', 'Splitter3']
@@ -38,15 +38,16 @@ DEFAULT_SPLITTERS = {k: DEFAULT_SPLITTERS[k] for k in mydataset_splitters}

def load_mydataset(
    featurizer: Featurizer = DEFAULT_FEATURIZERS['RawFeaturizer'],
    transformers: Tuple[Transformer] = (
        DEFAULT_TRANSFORMERS['PowerTransformer']),
    transformers: List[Transformer] = [
        DEFAULT_TRANSFORMERS['PowerTransformer']
    ],
    splitter: Splitter = DEFAULT_SPLITTERS['RandomSplitter'],
    reload: bool = True,
    data_dir: Optional[str] = None,
    save_dir: Optional[str] = None,
    featurizer_kwargs: Optional[Dict[str, object]] = None,
    splitter_kwargs: Optional[Dict[str, object]] = None,
    transformer_kwargs: Optional[Dict[str, Dict[str, object]]] = None,
    featurizer_kwargs: Dict[str, object] = {},
    splitter_kwargs: Dict[str, object] = {},
    transformer_kwargs: Dict[str, Dict[str, object]] = {},
    **kwargs) -> Tuple[List, Tuple, List]:
  """Load mydataset.

@@ -76,7 +77,7 @@ def load_mydataset(
  ----------
  featurizer : {List of allowed featurizers for this dataset}
    A featurizer that inherits from deepchem.feat.Featurizer.
  transformers : Tuple{List of allowed transformers for this dataset}
  transformers : List{List of allowed transformers for this dataset}
    A transformer that inherits from deepchem.trans.Transformer.
  splitter : {List of allowed splitters for this dataset}
    A splitter that inherits from deepchem.splits.splitters.Splitter.
@@ -153,9 +154,9 @@ def load_mydataset(
    featurizer = featurizer(**featurizer_kwargs)

  if isinstance(splitter, str):
    splitter = DEFAULT_SPLITTERS[splitter](**splitter_kwargs)
    splitter = DEFAULT_SPLITTERS[splitter]()
  elif issubclass(splitter, Splitter):
    splitter = splitter(**splitter_kwargs)
    splitter = splitter()

  # Reload from disk
  if reload:
@@ -198,16 +199,8 @@ def load_mydataset(
  # Featurize dataset
  dataset = loader.create_dataset(dataset_file)

  # 80/10/10 train/val/test split is default
  frac_train = kwargs.get("frac_train", 0.8)
  frac_valid = kwargs.get('frac_valid', 0.1)
  frac_test = kwargs.get('frac_test', 0.1)

  train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(
      dataset,
      frac_train=frac_train,
      frac_valid=frac_valid,
      frac_test=frac_test)
      dataset, **splitter_kwargs)

  # Initialize transformers
  transformers = [
+0 −0

Empty file added.

Loading