Commit 973848a8 authored by nd-02110114's avatar nd-02110114
Browse files

Merge branch 'master' into gat-add-mode

parents f8353349 651df9f9
Loading
Loading
Loading
Loading

.python-version

0 → 100644
+1 −0
Original line number Diff line number Diff line
miniconda3-latest
+1 −1
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ import pandas as pd
import numpy as np

from deepchem.utils.typing import OneOrMany
from deepchem.utils.save import load_image_files, load_csv_files, load_json_files, load_sdf_files
from deepchem.utils.data_utils import load_image_files, load_csv_files, load_json_files, load_sdf_files
from deepchem.utils.genomics_utils import encode_bio_sequence
from deepchem.feat import UserDefinedFeaturizer, Featurizer
from deepchem.data import Dataset, DiskDataset, NumpyDataset, ImageDataset
+21 −7
Original line number Diff line number Diff line
@@ -18,7 +18,7 @@ import pandas as pd

import deepchem as dc
from deepchem.utils.typing import OneOrMany, Shape
from deepchem.utils.save import save_to_disk, load_from_disk, load_image_files
from deepchem.utils.data_utils import save_to_disk, load_from_disk, load_image_files

Batch = Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]

@@ -420,6 +420,19 @@ class Dataset(object):
    """
    raise NotImplementedError()

  def select(self, indices: Sequence[int],
             select_dir: Optional[str] = None) -> "Dataset":
    """Creates a new dataset from a selection of indices from self.

    Parameters
    ----------
    indices: Sequence
      List of indices to select.
    select_dir: str, optional (default None)
      Path to new directory that the selected indices will be copied to.
    """
    raise NotImplementedError()

  def get_statistics(self, X_stats: bool = True,
                     y_stats: bool = True) -> Tuple[float, ...]:
    """Compute and return statistics of this dataset.
@@ -1868,13 +1881,13 @@ class DiskDataset(Dataset):
        tasks=tasks)

  @staticmethod
  def merge(datasets: Iterable["DiskDataset"],
  def merge(datasets: Iterable["Dataset"],
            merge_dir: Optional[str] = None) -> "DiskDataset":
    """Merges provided datasets into a merged dataset.

    Parameters
    ----------
    datasets: Iterable[DiskDataset]
    datasets: Iterable[Dataset]
      List of datasets to merge.
    merge_dir: str, optional (default None)
      The new directory path to store the merged DiskDataset.
@@ -1897,7 +1910,7 @@ class DiskDataset(Dataset):
    tasks = []
    for dataset in datasets:
      try:
        tasks.append(dataset.tasks)
        tasks.append(dataset.tasks)  # type: ignore
      except AttributeError:
        pass
    if tasks:
@@ -2033,7 +2046,7 @@ class DiskDataset(Dataset):

  def shuffle_each_shard(self,
                         shard_basenames: Optional[List[str]] = None) -> None:
    """Shuffles elements within each shard of the datset.
    """Shuffles elements within each shard of the dataset.

    Parameters
    ----------
@@ -2282,8 +2295,9 @@ class DiskDataset(Dataset):

    Returns
    -------
    DiskDataset
      A Dataset containing the selected samples
    Dataset
      A dataset containing the selected samples. The default dataset is `DiskDataset`.
      If `output_numpy_dataset` is True, the dataset is `NumpyDataset`.
    """
    if output_numpy_dataset and (select_dir is not None or
                                 select_shard_size is not None):
+1 −1
Original line number Diff line number Diff line
@@ -23,7 +23,7 @@ class TestReload(unittest.TestCase):
    # Load MUV dataset
    logger.info("About to featurize compounds")
    featurizer = dc.feat.CircularFingerprint(size=1024)
    raw_dataset = dc.utils.save.load_from_disk(dataset_file)
    raw_dataset = dc.utils.data_utils.load_from_disk(dataset_file)
    MUV_tasks = [
        'MUV-692', 'MUV-689', 'MUV-846', 'MUV-859', 'MUV-644', 'MUV-548',
        'MUV-852', 'MUV-600', 'MUV-810', 'MUV-712', 'MUV-737', 'MUV-858',
Loading