Unverified Commit f57af484 authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #2524 from seyonechithrananda/tokenizer_fix

Tokenizer fix
parents a8ead748 4d53b913
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -86,8 +86,8 @@ class SmilesTokenizer(BertTokenizer):

    super().__init__(vocab_file, **kwargs)
    # take into account special tokens in max length
    self.max_len_single_sentence = self.max_len - 2
    self.max_len_sentences_pair = self.max_len - 3
    self.max_len_single_sentence = self.model_max_length - 2
    self.max_len_sentences_pair = self.model_max_length - 3

    if not os.path.isfile(vocab_file):
      raise ValueError(
@@ -98,7 +98,7 @@ class SmilesTokenizer(BertTokenizer):
    self.ids_to_tokens = collections.OrderedDict(
        [(ids, tok) for tok, ids in self.vocab.items()])
    self.basic_tokenizer = BasicSmilesTokenizer()
    self.init_kwargs["max_len"] = self.max_len
    self.init_kwargs["model_max_length"] = self.model_max_length

  @property
  def vocab_size(self):