xu song commited on
Commit
f78e379
1 Parent(s): 7119d44

Fix tokenizer load error

Browse files
Files changed (1) hide show
  1. tokenization_moss.py +4 -3
tokenization_moss.py CHANGED
@@ -146,6 +146,10 @@ class MossTokenizer(PreTrainedTokenizer):
146
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
147
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
148
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
 
 
 
 
149
  super().__init__(
150
  errors=errors,
151
  unk_token=unk_token,
@@ -156,10 +160,7 @@ class MossTokenizer(PreTrainedTokenizer):
156
  add_bos_token=add_bos_token,
157
  **kwargs,
158
  )
159
- self.add_bos_token = add_bos_token
160
 
161
- with open(vocab_file, encoding="utf-8") as vocab_handle:
162
- self.encoder = json.load(vocab_handle)
163
  self.decoder = {v: k for k, v in self.encoder.items()}
164
  self.errors = errors # how to handle errors in decoding
165
  self.byte_encoder = bytes_to_unicode()
 
146
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
147
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
148
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
149
+ self.add_bos_token = add_bos_token
150
+ with open(vocab_file, encoding="utf-8") as vocab_handle:
151
+ self.encoder = json.load(vocab_handle)
152
+
153
  super().__init__(
154
  errors=errors,
155
  unk_token=unk_token,
 
160
  add_bos_token=add_bos_token,
161
  **kwargs,
162
  )
 
163
 
 
 
164
  self.decoder = {v: k for k, v in self.encoder.items()}
165
  self.errors = errors # how to handle errors in decoding
166
  self.byte_encoder = bytes_to_unicode()