Unverified Commit d1cb1551 authored by Niklas Hölter's avatar Niklas Hölter Committed by GitHub
Browse files

Update test_smiles_tokenizer.py

parent d168aea0
Loading
Loading
Loading
Loading
+1 −7
Original line number Diff line number Diff line
@@ -17,13 +17,7 @@ class TestSmilesTokenizer(TestCase):
  def test_tokenize(self):
    current_dir = os.path.dirname(os.path.realpath(__file__))
    vocab_path = os.path.join(current_dir, 'data', 'vocab.txt')
    tokenized_smiles = [
      12, 16, 16, 16, 17, 16, 16, 18, 16, 19, 16, 17, 22, 19, 18, 33, 17, 16,
      18, 23, 181, 17, 22, 19, 18, 17, 19, 16, 33, 20, 19, 55, 17, 16, 38, 23, 18,
      17, 33, 17, 19, 18, 35, 20, 19, 18, 16, 20, 22, 16, 16, 22, 16, 21, 23,
      20, 23, 22, 16, 23, 22, 16, 21, 23, 18, 19, 16, 20, 22, 16, 16, 22, 16,
      16, 22, 16, 20, 13
    ]
    tokenized_smiles = [12, 16, 16, 16, 17, 16, 16, 18, 16, 19, 16, 17, 22, 19, 18, 33, 17, 16, 18, 23, 181, 17, 22, 19, 18, 17, 19, 16, 33, 20, 19, 55, 17, 16, 38, 23, 18, 17, 33, 17, 19, 18, 35, 20, 19, 18, 16, 20, 22, 16, 16, 22, 16, 21, 23, 20, 23, 22, 16, 23, 22, 16, 21, 23, 18, 19, 16, 20, 22, 16, 16, 22, 16, 16, 22, 16, 20, 13]

    model = RobertaForMaskedLM.from_pretrained(
        'seyonec/SMILES_tokenized_PubChem_shard00_50k')