Unverified Commit 59e02291 authored by Daiki Nishikawa's avatar Daiki Nishikawa Committed by GitHub
Browse files

Merge pull request #2186 from nd-02110114/fix-duplicated-error

Apply flake8 in deepchem.trans, rl, metaleanring and fix dataloader deplicated warnings
parents f7c8ae73 b47cb50b
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@ def load_solubility_data():
  tasks = ["log-solubility"]
  input_file = os.path.join(current_dir, "../../models/tests/example.csv")
  loader = dc.data.CSVLoader(
      tasks=tasks, smiles_field="smiles", featurizer=featurizer)
      tasks=tasks, feature_field="smiles", featurizer=featurizer)

  return loader.create_dataset(input_file)

@@ -39,8 +39,8 @@ def load_multitask_data():
  input_file = os.path.join(current_dir,
                            "../../models/tests/multitask_example.csv")
  loader = dc.data.CSVLoader(
      tasks=tasks, smiles_field="smiles", featurizer=featurizer)
  return loader.featurize(input_file)
      tasks=tasks, feature_field="smiles", featurizer=featurizer)
  return loader.create_dataset(input_file)


class TestTransformer(dc.trans.Transformer):
+2 −2
Original line number Diff line number Diff line
@@ -25,8 +25,8 @@ class TestDrop(unittest.TestCase):
    emols_tasks = ['activity']

    loader = dc.data.CSVLoader(
        tasks=emols_tasks, smiles_field="smiles", featurizer=featurizer)
    dataset = loader.featurize(dataset_file)
        tasks=emols_tasks, feature_field="smiles", featurizer=featurizer)
    dataset = loader.create_dataset(dataset_file)

    X, y, w, ids = (dataset.X, dataset.y, dataset.w, dataset.ids)
    assert len(X) == len(y) == len(w) == len(ids)
+1 −1
Original line number Diff line number Diff line
@@ -20,7 +20,7 @@ class TestFASTALoader(unittest.TestCase):
    input_file = os.path.join(self.current_dir,
                              "../../data/tests/example.fasta")
    loader = dc.data.FASTALoader()
    sequences = loader.featurize(input_file)
    sequences = loader.create_dataset(input_file)

    # example.fasta contains 3 sequences each of length 58.
    # The one-hot encoding turns base-pairs into vectors of length 5 (ATCGN).
+8 −8
Original line number Diff line number Diff line
@@ -58,45 +58,45 @@ class TestImageLoader(unittest.TestCase):

  def test_png_simple_load(self):
    loader = dc.data.ImageLoader()
    dataset = loader.featurize(self.face_path)
    dataset = loader.create_dataset(self.face_path)
    # These are the known dimensions of face.png
    assert dataset.X.shape == (1, 768, 1024, 3)

  def test_png_simple_load_with_labels(self):
    loader = dc.data.ImageLoader()
    dataset = loader.featurize((self.face_path, np.array(1)))
    dataset = loader.create_dataset((self.face_path, np.array(1)))
    # These are the known dimensions of face.png
    assert dataset.X.shape == (1, 768, 1024, 3)
    assert (dataset.y == np.ones((1,))).all()

  def test_tif_simple_load(self):
    loader = dc.data.ImageLoader()
    dataset = loader.featurize(self.tif_image_path)
    dataset = loader.create_dataset(self.tif_image_path)
    # TODO(rbharath): Where are the color channels?
    assert dataset.X.shape == (1, 44, 330)

  def test_png_multi_load(self):
    loader = dc.data.ImageLoader()
    dataset = loader.featurize([self.face_path, self.face_copy_path])
    dataset = loader.create_dataset([self.face_path, self.face_copy_path])
    assert dataset.X.shape == (2, 768, 1024, 3)

  def test_png_zip_load(self):
    loader = dc.data.ImageLoader()
    dataset = loader.featurize(self.zip_path)
    dataset = loader.create_dataset(self.zip_path)
    assert dataset.X.shape == (1, 768, 1024, 3)

  def test_png_multi_zip_load(self):
    loader = dc.data.ImageLoader()
    dataset = loader.featurize(self.multi_zip_path)
    dataset = loader.create_dataset(self.multi_zip_path)
    assert dataset.X.shape == (2, 768, 1024, 3)

  def test_multitype_zip_load(self):
    loader = dc.data.ImageLoader()
    dataset = loader.featurize(self.multitype_zip_path)
    dataset = loader.create_dataset(self.multitype_zip_path)
    # Since the different files have different shapes, makes an object array
    assert dataset.X.shape == (2,)

  def test_directory_load(self):
    loader = dc.data.ImageLoader()
    dataset = loader.featurize(self.image_dir)
    dataset = loader.create_dataset(self.image_dir)
    assert dataset.X.shape == (2, 768, 1024, 3)
+8 −8
Original line number Diff line number Diff line
@@ -32,8 +32,8 @@ class TestLoad(unittest.TestCase):
    featurizer = dc.feat.CircularFingerprint(size=1024)
    tasks = ["log-solubility"]
    loader = dc.data.CSVLoader(
        tasks=tasks, smiles_field="smiles", featurizer=featurizer)
    dataset = loader.featurize(dataset_file, data_dir)
        tasks=tasks, feature_field="smiles", featurizer=featurizer)
    dataset = loader.create_dataset(dataset_file, data_dir)

    X, y, w, ids = (dataset.X, dataset.y, dataset.w, dataset.ids)
    shutil.move(data_dir, moved_data_dir)
@@ -70,8 +70,8 @@ class TestLoad(unittest.TestCase):

    # featurization
    loader = dc.data.CSVLoader(
        tasks=all_tasks, smiles_field="smiles", featurizer=featurizer)
    dataset = loader.featurize(dataset_file, data_dir)
        tasks=all_tasks, feature_field="smiles", featurizer=featurizer)
    dataset = loader.create_dataset(dataset_file, data_dir)

    # train/valid split.
    _, y_multi, w_multi, _ = (dataset.X, dataset.y, dataset.w, dataset.ids)
@@ -121,8 +121,8 @@ class TestLoad(unittest.TestCase):

    # multitask load
    loader = dc.data.CSVLoader(
        tasks=tasks, smiles_field="smiles", featurizer=featurizer)
    dataset = loader.featurize(dataset_file, data_dir)
        tasks=tasks, feature_field="smiles", featurizer=featurizer)
    dataset = loader.create_dataset(dataset_file, data_dir)

    # Do train/valid split.
    _, y_multi, w_multi, _ = (dataset.X, dataset.y, dataset.w, dataset.ids)
@@ -134,8 +134,8 @@ class TestLoad(unittest.TestCase):
      if os.path.exists(data_dir):
        shutil.rmtree(data_dir)
      loader = dc.data.CSVLoader(
          tasks=[task], smiles_field="smiles", featurizer=featurizer)
      dataset = loader.featurize(dataset_file, data_dir)
          tasks=[task], feature_field="smiles", featurizer=featurizer)
      dataset = loader.create_dataset(dataset_file, data_dir)

      _, y_task, w_task, ids_task = (dataset.X, dataset.y, dataset.w,
                                     dataset.ids)
Loading