Commit f1cb7d59 authored by seyonechithrananda's avatar seyonechithrananda
Browse files

change from unittest to pytest

parent a49e6ca2
Loading
Loading
Loading
Loading
+25 −14
Original line number Diff line number Diff line
import unittest
from deepchem.feat.roberta_tokenizer import RobertaFeaturizer
import pytest


class TestRobertaFeaturizer(unittest.TestCase):
  """Tests for RobertaFeaturizer"""

@pytest.mark.torch
def setUp(self):
    from deepchem.feat.roberta_tokenizer import RobertaFeaturizer
    self.smiles = ["Cn1c(=O)c2c(ncn2C)n(C)c1=O", "CC(=O)N1CN(C(C)=O)C(O)C1O"]
    self.long_molecule_smiles = [
        "CCCCCCCCCCCCCCCCCCCC(=O)OCCCNC(=O)c1ccccc1SSc1ccccc1C(=O)NCCCOC(=O)CCCCCCCCCCCCCCCCCCC"
@@ -15,9 +11,17 @@ class TestRobertaFeaturizer(unittest.TestCase):
    self.featurizer = RobertaFeaturizer.from_pretrained(
        "seyonec/SMILES_tokenized_PubChem_shard00_160k")


@pytest.mark.torch
def test_smiles_call(self):
    """Test __call__ method for the featurizer, which is inherited from HuggingFace's RobertaTokenizerFast"""
    from deepchem.feat.roberta_tokenizer import RobertaFeaturizer
    self.smiles = ["Cn1c(=O)c2c(ncn2C)n(C)c1=O", "CC(=O)N1CN(C(C)=O)C(O)C1O"]
    self.long_molecule_smiles = [
        "CCCCCCCCCCCCCCCCCCCC(=O)OCCCNC(=O)c1ccccc1SSc1ccccc1C(=O)NCCCOC(=O)CCCCCCCCCCCCCCCCCCC"
    ]
    self.featurizer = RobertaFeaturizer.from_pretrained(
        "seyonec/SMILES_tokenized_PubChem_shard00_160k")
    embedding = self.featurizer(
        self.smiles, add_special_tokens=True, truncation=True)
    embedding_long = self.featurizer(
@@ -26,6 +30,7 @@ class TestRobertaFeaturizer(unittest.TestCase):
        assert 'input_ids' in emb.keys() and 'attention_mask' in emb.keys()
        assert len(emb['input_ids']) == 2 and len(emb['attention_mask']) == 2


@pytest.mark.torch
def test_smiles_featurize(self):
    """Test the .featurize method, which will convert the dictionary output to an array
@@ -33,11 +38,17 @@ class TestRobertaFeaturizer(unittest.TestCase):
    Checks that all SMILES are featurized and that each featurization
    contains input_ids and attention_mask
    """
    from deepchem.feat.roberta_tokenizer import RobertaFeaturizer
    self.smiles = ["Cn1c(=O)c2c(ncn2C)n(C)c1=O", "CC(=O)N1CN(C(C)=O)C(O)C1O"]
    self.long_molecule_smiles = [
        "CCCCCCCCCCCCCCCCCCCC(=O)OCCCNC(=O)c1ccccc1SSc1ccccc1C(=O)NCCCOC(=O)CCCCCCCCCCCCCCCCCCC"
    ]
    self.featurizer = RobertaFeaturizer.from_pretrained(
        "seyonec/SMILES_tokenized_PubChem_shard00_160k")
    feats = self.featurizer.featurize(
        self.smiles, add_special_tokens=True, truncation=True)
    assert (len(feats) == 2)
    assert (all([len(f) == 2 for f in feats]))

    long_feat = self.featurizer.featurize(
        self.long_molecule_smiles, add_special_tokens=True, truncation=True)
    assert (len(long_feat) == 1)