Unverified Commit a882ad8f authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #2578 from seyonechithrananda/st_fix

Corrected Test Case for Smiles Tokenizer (adding to #2572)
parents b010928f f541cea3
Loading
Loading
Loading
Loading
+6 −4
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@
import os
import unittest
from unittest import TestCase
import pytest
try:
  from transformers import RobertaForMaskedLM
  from deepchem.feat.smiles_tokenizer import SmilesTokenizer
@@ -14,15 +15,16 @@ class TestSmilesTokenizer(TestCase):
  """Tests the SmilesTokenizer to load the USPTO vocab file and a ChemBERTa Masked LM model with pre-trained weights.."""

  @unittest.skipIf(not has_transformers, 'transformers are not installed')
  @pytest.mark.torch
  def test_tokenize(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    vocab_path = os.path.join(current_dir, 'data', 'vocab.txt')
    tokenized_smiles = [
        12, 16, 16, 16, 17, 16, 16, 18, 16, 19, 16, 17, 22, 19, 18, 33, 17, 16,
        18, 23, 181, 17, 22, 19, 18, 17, 19, 16, 33, 20, 19, 55, 17, 16, 23, 18,
        17, 33, 17, 19, 18, 35, 20, 19, 18, 16, 20, 22, 16, 16, 22, 16, 21, 23,
        20, 23, 22, 16, 23, 22, 16, 21, 23, 18, 19, 16, 20, 22, 16, 16, 22, 16,
        16, 22, 16, 20, 13
        18, 23, 181, 17, 22, 19, 18, 17, 19, 16, 33, 20, 19, 55, 17, 16, 38, 23,
        18, 17, 33, 17, 19, 18, 35, 20, 19, 18, 16, 20, 22, 16, 16, 22, 16, 21,
        23, 20, 23, 22, 16, 23, 22, 16, 21, 23, 18, 19, 16, 20, 22, 16, 16, 22,
        16, 16, 22, 16, 20, 13
    ]

    model = RobertaForMaskedLM.from_pretrained(
+1 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@ dependencies:
  - numpy==1.21.*
  - pip==20.2.*
  - pip:
    - transformers==4.6.*
    - tensorflow~=2.4
    - dgllife==0.2.8
    - pymatgen==2020.12.31