Commit 51bca116 authored by nd-02110114's avatar nd-02110114
Browse files

🚨 fix flake8 errors in dataset test files

parent d3bb4ee8
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ import numpy as np
import deepchem as dc

try:
  import torch
  import torch  # noqa
  PYTORCH_IMPORT_FAILED = False
except ImportError:
  PYTORCH_IMPORT_FAILED = True
@@ -744,7 +744,7 @@ def _validate_pytorch_dataset(dataset):

  # Test iterating with multiple workers.

  import torch
  import torch  # noqa
  loader = torch.utils.data.DataLoader(ds, num_workers=3)
  id_count = dict((id, 0) for id in ids)
  for iter_X, iter_y, iter_w, iter_id in loader:
+0 −8
Original line number Diff line number Diff line
import os
import shutil
import logging
import unittest
import tempfile
import deepchem as dc
import numpy as np
from sklearn.ensemble import RandomForestClassifier

logger = logging.getLogger(__name__)

@@ -19,10 +15,6 @@ class TestDrop(unittest.TestCase):

  def test_drop(self):
    """Test on dataset where RDKit fails on some strings."""
    # Set some global variables up top
    reload = True
    len_full = 25

    current_dir = os.path.dirname(os.path.realpath(__file__))
    logger.info("About to load emols dataset.")
    dataset_file = os.path.join(current_dir, "mini_emols.csv")
+1 −12
Original line number Diff line number Diff line
@@ -6,14 +6,8 @@ import tempfile

def test_make_legacy_dataset_from_numpy():
  """Test that legacy DiskDataset objects can be constructed."""
  # This is the shape of legacy_data
  num_datapoints = 100
  num_features = 10
  num_tasks = 10

  current_dir = os.path.dirname(os.path.abspath(__file__))
  # legacy_dataset is a dataset in the legacy format kept around for testing
  # purposes.
  # legacy_dataset is a dataset in the legacy format kept around for testing purposes.
  data_dir = os.path.join(current_dir, "legacy_dataset")
  dataset = dc.data.DiskDataset(data_dir)
  assert dataset.legacy_metadata
@@ -29,11 +23,6 @@ def test_make_legacy_dataset_from_numpy():

def test_reshard():
  """Test that resharding updates legacy datasets."""
  # This is the shape of legacy_data_reshard
  num_datapoints = 100
  num_features = 10
  num_tasks = 10

  # legacy_dataset_reshard is a sharded dataset in the legacy format kept
  # around for testing resharding.
  current_dir = os.path.dirname(os.path.abspath(__file__))
+14 −20
Original line number Diff line number Diff line
@@ -55,7 +55,7 @@ class TestLoad(unittest.TestCase):
    np.random.seed(123)

    current_dir = os.path.dirname(os.path.realpath(__file__))
    ##Make directories to store the raw and featurized datasets.
    # Make directories to store the raw and featurized datasets.
    data_dir = tempfile.mkdtemp()

    # Load dataset
@@ -68,27 +68,25 @@ class TestLoad(unittest.TestCase):
    featurizer = dc.feat.CircularFingerprint(size=1024)
    all_tasks = ["task%d" % i for i in range(17)]

    ####### Do featurization
    # featurization
    loader = dc.data.CSVLoader(
        tasks=all_tasks, smiles_field="smiles", featurizer=featurizer)
    dataset = loader.featurize(dataset_file, data_dir)

    # Do train/valid split.
    X_multi, y_multi, w_multi, ids_multi = (dataset.X, dataset.y, dataset.w,
                                            dataset.ids)
    # train/valid split.
    _, y_multi, w_multi, _ = (dataset.X, dataset.y, dataset.w, dataset.ids)

    ####### Do singletask load
    # singletask load
    y_tasks, w_tasks, = [], []
    dataset = dc.data.DiskDataset(data_dir)
    for ind, task in enumerate(all_tasks):
      logger.info("Processing task %s" % task)

      X_task, y_task, w_task, ids_task = (dataset.X, dataset.y, dataset.w,
                                          dataset.ids)
      _, y_task, w_task, _ = (dataset.X, dataset.y, dataset.w, dataset.ids)
      y_tasks.append(y_task[:, ind])
      w_tasks.append(w_task[:, ind])

    ################## Do comparison
    # comparison
    for ind, task in enumerate(all_tasks):
      y_multi_task = y_multi[:, ind]
      w_multi_task = w_multi[:, ind]
@@ -104,9 +102,6 @@ class TestLoad(unittest.TestCase):
    # Only for debug!
    np.random.seed(123)

    # Set some global variables up top
    reload = True

    current_dir = os.path.dirname(os.path.realpath(__file__))
    # Make directories to store the raw and featurized datasets.
    data_dir = tempfile.mkdtemp()
@@ -124,16 +119,15 @@ class TestLoad(unittest.TestCase):
    n_tasks = 17
    tasks = all_tasks[0:n_tasks]

    ####### Do multitask load
    # multitask load
    loader = dc.data.CSVLoader(
        tasks=tasks, smiles_field="smiles", featurizer=featurizer)
    dataset = loader.featurize(dataset_file, data_dir)

    # Do train/valid split.
    X_multi, y_multi, w_multi, ids_multi = (dataset.X, dataset.y, dataset.w,
                                            dataset.ids)
    _, y_multi, w_multi, _ = (dataset.X, dataset.y, dataset.w, dataset.ids)

    ####### Do singletask load
    # singletask load
    y_tasks, w_tasks, ids_tasks = [], [], []
    for task in tasks:
      logger.info("Processing task %s" % task)
@@ -143,13 +137,13 @@ class TestLoad(unittest.TestCase):
          tasks=[task], smiles_field="smiles", featurizer=featurizer)
      dataset = loader.featurize(dataset_file, data_dir)

      X_task, y_task, w_task, ids_task = (dataset.X, dataset.y, dataset.w,
      _, y_task, w_task, ids_task = (dataset.X, dataset.y, dataset.w,
                                     dataset.ids)
      y_tasks.append(y_task)
      w_tasks.append(w_task)
      ids_tasks.append(ids_task)

    ################## Do comparison
    # comparison
    for ind, task in enumerate(tasks):
      y_multi_task = y_multi[:, ind]
      w_multi_task = w_multi[:, ind]
+0 −2
Original line number Diff line number Diff line
@@ -2,8 +2,6 @@
Testing singletask/multitask dataset merging
"""
import os
import shutil
import tempfile
import deepchem as dc
import numpy as np

Loading