danielhanchen commited on
Commit
1da36b4
1 Parent(s): 123ed36

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +1 -0
  2. tokenizer_config.json +3 -2
tokenizer.json CHANGED
@@ -134,6 +134,7 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
+ "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -31,9 +32,9 @@
31
  "clean_up_tokenization_spaces": false,
32
  "eos_token": "</s>",
33
  "legacy": false,
34
- "model_max_length": 2048,
35
  "pad_token": "<unk>",
36
- "padding_side": "right",
37
  "sp_model_kwargs": {},
38
  "tokenizer_class": "LlamaTokenizer",
39
  "unk_token": "<unk>",
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
34
  "legacy": false,
35
+ "model_max_length": 1000000000000000019884624838656,
36
  "pad_token": "<unk>",
37
+ "padding_side": "left",
38
  "sp_model_kwargs": {},
39
  "tokenizer_class": "LlamaTokenizer",
40
  "unk_token": "<unk>",