Commit af13b69f authored by Atreya Majumdar's avatar Atreya Majumdar
Browse files

Added basic skeleton

parent 692a2ed7
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@ from deepchem.molnet.load_function.thermosol_datasets import load_thermosol
from deepchem.molnet.load_function.hppb_datasets import load_hppb
from deepchem.molnet.load_function.chembl25_datasets import load_chembl25
from deepchem.molnet.load_function.zinc15_datasets import load_zinc15
from deepchem.molnet.load_function.freesolv_dataset import load_freesolv
from deepchem.molnet.load_function.material_datasets.load_bandgap import load_bandgap
from deepchem.molnet.load_function.material_datasets.load_perovskite import load_perovskite
from deepchem.molnet.load_function.material_datasets.load_Pt_NO_surface_adsorbate_energy import load_Platinum_Adsorption
+33 −0
Original line number Diff line number Diff line
"""
freesolv dataset loader.
"""
import os
import deepchem as dc
from deepchem.molnet.load_function.molnet_loader import TransformerGenerator, _MolnetLoader
from deepchem.data import Dataset
from typing import List, Optional, Tuple, Union

FREESOLV_URL = 'https://deepchemdata.s3.us-west-1.amazonaws.com/datasets/freesolv.csv.gz'
FREESOLV_TASKS = ['y']

class _FreesolvLoader(_MolnetLoader):

    def create_dataset(self) -> Dataset:
        dataset_file = os.path.join(self.data_dir, 'freesolv.csv.gz')
        if not os.path.exists(dataset_file):
            dc.utils.data_utils.download_url(url = FREESOLV_URL, dest_dir = self.data_dir)
            loader = dc.data.CSVLoader(tasks = self.tasks, feature_field = 'smiles', featurizer = self.featurizer)
            return loader.create_dataset(dataset_file)
    
def load_freesolv(
    featurizer: Union[dc.feat.Featurizer, str] = dc.feat.MATFeaturizer(),
    splitter: Union[dc.splits.Splitter, str, None] = None,
    transformers: List[Union[TransformerGenerator, str]] = ['normalization'],
    reload: bool = True,
    data_dir: Optional[str] = None,
    save_dir: Optional[str] = None,
    **kwargs
) -> Tuple[List[str], Tuple[Dataset, ...], List[dc.trans.Transformer]]:

    loader = _FreesolvLoader(featurizer, splitter, transformers, FREESOLV_TASKS, data_dir, save_dir, **kwargs)
    return loader.load_dataset('freesolv', reload)
 No newline at end of file