change from unittest to pytest (f1cb7d59) · Commits · 钟慕尧 / deepchem

deepchem/feat/tests/test_roberta_tokenizer.py

+25 −14

Original line number	Diff line number	Diff line
		import unittest
		from deepchem.feat.roberta_tokenizer import RobertaFeaturizer
		import pytest


		class TestRobertaFeaturizer(unittest.TestCase):
		"""Tests for RobertaFeaturizer"""

		@pytest.mark.torch
		def setUp(self):
		from deepchem.feat.roberta_tokenizer import RobertaFeaturizer
		self.smiles = ["Cn1c(=O)c2c(ncn2C)n(C)c1=O", "CC(=O)N1CN(C(C)=O)C(O)C1O"]
		self.long_molecule_smiles = [
		"CCCCCCCCCCCCCCCCCCCC(=O)OCCCNC(=O)c1ccccc1SSc1ccccc1C(=O)NCCCOC(=O)CCCCCCCCCCCCCCCCCCC"
		@@ -15,9 +11,17 @@ class TestRobertaFeaturizer(unittest.TestCase):
		self.featurizer = RobertaFeaturizer.from_pretrained(
		"seyonec/SMILES_tokenized_PubChem_shard00_160k")


		@pytest.mark.torch
		def test_smiles_call(self):
		"""Test __call__ method for the featurizer, which is inherited from HuggingFace's RobertaTokenizerFast"""
		from deepchem.feat.roberta_tokenizer import RobertaFeaturizer
		self.smiles = ["Cn1c(=O)c2c(ncn2C)n(C)c1=O", "CC(=O)N1CN(C(C)=O)C(O)C1O"]
		self.long_molecule_smiles = [
		"CCCCCCCCCCCCCCCCCCCC(=O)OCCCNC(=O)c1ccccc1SSc1ccccc1C(=O)NCCCOC(=O)CCCCCCCCCCCCCCCCCCC"
		]
		self.featurizer = RobertaFeaturizer.from_pretrained(
		"seyonec/SMILES_tokenized_PubChem_shard00_160k")
		embedding = self.featurizer(
		self.smiles, add_special_tokens=True, truncation=True)
		embedding_long = self.featurizer(
		@@ -26,6 +30,7 @@ class TestRobertaFeaturizer(unittest.TestCase):
		assert 'input_ids' in emb.keys() and 'attention_mask' in emb.keys()
		assert len(emb['input_ids']) == 2 and len(emb['attention_mask']) == 2


		@pytest.mark.torch
		def test_smiles_featurize(self):
		"""Test the .featurize method, which will convert the dictionary output to an array
		@@ -33,11 +38,17 @@ class TestRobertaFeaturizer(unittest.TestCase):
		Checks that all SMILES are featurized and that each featurization
		contains input_ids and attention_mask
		"""
		from deepchem.feat.roberta_tokenizer import RobertaFeaturizer
		self.smiles = ["Cn1c(=O)c2c(ncn2C)n(C)c1=O", "CC(=O)N1CN(C(C)=O)C(O)C1O"]
		self.long_molecule_smiles = [
		"CCCCCCCCCCCCCCCCCCCC(=O)OCCCNC(=O)c1ccccc1SSc1ccccc1C(=O)NCCCOC(=O)CCCCCCCCCCCCCCCCCCC"
		]
		self.featurizer = RobertaFeaturizer.from_pretrained(
		"seyonec/SMILES_tokenized_PubChem_shard00_160k")
		feats = self.featurizer.featurize(
		self.smiles, add_special_tokens=True, truncation=True)
		assert (len(feats) == 2)
		assert (all([len(f) == 2 for f in feats]))

		long_feat = self.featurizer.featurize(
		self.long_molecule_smiles, add_special_tokens=True, truncation=True)
		assert (len(long_feat) == 1)

Admin message