Commit 85117b85 authored by vsag96's avatar vsag96
Browse files

Update test in genome metrics.

All the tests in the test_genomics use the functions in the deepchem.uti
-ls.genomics module.
parent e67007a3
Loading
Loading
Loading
Loading
+3 −5
Original line number Diff line number Diff line
@@ -18,7 +18,7 @@ import sys
from deepchem.utils.save import log
from deepchem.utils.save import load_csv_files
from deepchem.utils.save import load_sdf_files
from deepchem.utils.save import encode_fasta_sequence
from deepchem.utils.genomics import encode_fasta_sequence
from deepchem.feat import UserDefinedFeaturizer
from deepchem.data import DiskDataset
from deepchem.data import NumpyDataset
@@ -230,8 +230,7 @@ class DataLoader(object):
          assert len(X) == len(ids)

        time2 = time.time()
        log(
            "TIMING: featurizing shard %d took %0.3f s" %
        log("TIMING: featurizing shard %d took %0.3f s" %
            (shard_num, time2 - time1), self.verbose)
        yield X, y, w, ids

@@ -295,8 +294,7 @@ class SDFLoader(DataLoader):

  def featurize_shard(self, shard):
    """Featurizes a shard of an input dataframe."""
    log(
        "Currently featurizing feature_type: %s" %
    log("Currently featurizing feature_type: %s" %
        self.featurizer.__class__.__name__, self.verbose)
    return featurize_mol_df(shard, self.featurizer, field=self.mol_field)

+4 −4
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@ class TestGenomicMetrics(unittest.TestCase):
    # Encode motif
    motif_name = "TAL1_known4"
    sequences = np.array(["ACGTA", "GATAG", "CGCGC"])
    sequences = dc.utils.save.seq_one_hot_encode(sequences, letters=LETTERS)
    sequences = dc.utils.genomics.seq_one_hot_encode(sequences, letters=LETTERS)
    # sequences now has shape (3, 4, 5, 1)
    self.assertEqual(sequences.shape, (3, 4, 5, 1))

@@ -38,7 +38,7 @@ class TestGenomicMetrics(unittest.TestCase):
    """Test get_pssm_scores returns correct shape."""
    motif_name = "TAL1_known4"
    sequences = np.array(["ACGTA", "GATAG", "CGCGC"])
    sequences = dc.utils.save.seq_one_hot_encode(sequences, letters=LETTERS)
    sequences = dc.utils.genomics.seq_one_hot_encode(sequences, letters=LETTERS)
    # sequences now has shape (3, 4, 5, 1)
    self.assertEqual(sequences.shape, (3, 4, 5, 1))
    pssm = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
@@ -50,7 +50,7 @@ class TestGenomicMetrics(unittest.TestCase):
    """Test in-silico mutagenesis returns correct shape."""
    # Construct and train SequenceDNN model
    sequences = np.array(["ACGTA", "GATAG", "CGCGC"])
    sequences = dc.utils.save.seq_one_hot_encode(sequences, letters=LETTERS)
    sequences = dc.utils.genomics.seq_one_hot_encode(sequences, letters=LETTERS)
    labels = np.array([1, 0, 0])
    labels = np.reshape(labels, (3, 1))
    self.assertEqual(sequences.shape, (3, 4, 5, 1))
@@ -71,7 +71,7 @@ class TestGenomicMetrics(unittest.TestCase):
    """Test in-silico mutagenesis returns nonzero output."""
    # Construct and train SequenceDNN model
    sequences = np.array(["ACGTA", "GATAG", "CGCGC"])
    sequences = dc.utils.save.seq_one_hot_encode(sequences, letters=LETTERS)
    sequences = dc.utils.genomics.seq_one_hot_encode(sequences, letters=LETTERS)
    labels = np.array([1, 0, 0])
    labels = np.reshape(labels, (3, 1))
    self.assertEqual(sequences.shape, (3, 4, 5, 1))