Commit d6bd5944 authored by Nathan Frey's avatar Nathan Frey
Browse files

Add type hints and docs

parent cf455f6a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@ from deepchem.data.supports import *
from deepchem.data.data_loader import DataLoader
from deepchem.data.data_loader import CSVLoader
from deepchem.data.data_loader import UserCSVLoader
from deepchem.data.data_loader import JsonLoader
from deepchem.data.data_loader import SDFLoader
from deepchem.data.data_loader import FASTALoader
from deepchem.data.data_loader import ImageLoader
+14 −12
Original line number Diff line number Diff line
@@ -12,10 +12,12 @@ import time
import sys
import logging
import warnings
from typing import List, Optional

from deepchem.utils.save import load_csv_files, load_json_files
from deepchem.utils.save import load_sdf_files
from deepchem.utils.genomics import encode_fasta_sequence
from deepchem.feat import UserDefinedFeaturizer
from deepchem.feat import UserDefinedFeaturizer, Featurizer
from deepchem.data import DiskDataset, NumpyDataset, ImageDataset
import zipfile

@@ -450,16 +452,16 @@ class JsonLoader(DataLoader):
  """

  def __init__(self,
               tasks,
               smiles_field=None,
               id_field=None,
               featurizer=None,
               log_every_n=1000):
               tasks: List[str],
               smiles_field: Optional[str] = None,
               id_field: Optional[str] = None,
               featurizer: Optional[Featurizer] = None,
               log_every_n: int = 1000):
    """Initializes JsonLoader.

    Parameters
    ----------
    tasks: list[str]
    tasks : List[str]
      List of task names
    smiles_field : str, optional
      Name of field that holds smiles string 
@@ -473,7 +475,7 @@ class JsonLoader(DataLoader):
    """

    if not isinstance(tasks, list):
      raise ValueError("tasks must be a list.")
      raise ValueError("Tasks must be a list.")
    self.tasks = tasks
    self.smiles_field = smiles_field
    if id_field is None:
+28 −2
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ import numpy as np
import os
import deepchem
import warnings
from typing import List, Optional
from deepchem.utils.genomics import encode_bio_sequence as encode_sequence, encode_fasta_sequence as fasta_sequence, seq_one_hot_encode as seq_one_hotencode


@@ -116,8 +117,33 @@ def load_csv_files(filenames, shard_size=None, verbose=True):
        yield df


def load_json_files(filenames, shard_size=None, verbose=True):
  """Load data as pandas dataframe."""
def load_json_files(filenames: List[str],
                    shard_size: Optional[int] = None,
                    verbose: bool = True):
  """Load data as pandas dataframe.

  Parameters
  ----------
  filenames : List[str]
    List of json filenames.
  shard_size : int, optional
    Chunksize for reading json files.
  verbose : bool (default True)
    Log json loading with shard numbers.

  Yields
  ------
  df : pandas.DataFrame
    Shard of dataframe.

  Notes
  -----
  To load shards from a json file into a Pandas dataframe, the file
    must be originally saved with
  ``df.to_json('filename.json', orient='records', lines=True)``

  """

  shard_num = 1
  for filename in filenames:
    if shard_size is None:
+6 −0
Original line number Diff line number Diff line
@@ -22,6 +22,12 @@ UserCSVLoader
.. autoclass:: deepchem.data.UserCSVLoader
  :members:

JsonLoader
^^^^^^^^^^

.. autoclass:: deepchem.data.JsonLoader
  :members:

FASTALoader
^^^^^^^^^^^

+2 −0
Original line number Diff line number Diff line
@@ -54,6 +54,8 @@ File Handling

.. autofunction:: deepchem.utils.save.load_csv_files

.. autofunction:: deepchem.utils.save.load_json_files

.. autofunction:: deepchem.utils.save.save_metadata

.. autofunction:: deepchem.utils.save.load_from_disk