grimulkan commited on
Commit
7077c47
1 Parent(s): 7d9b8c7

Removed extra pad token in tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "</s>": 2,
3
  "<s>": 1,
4
- "<unk>": 0,
5
- "[PAD]": 32000
6
  }
 
1
  {
2
  "</s>": 2,
3
  "<s>": 1,
4
+ "<unk>": 0
 
5
  }
config.json CHANGED
@@ -23,5 +23,5 @@
23
  "torch_dtype": "float16",
24
  "transformers_version": "4.34.0",
25
  "use_cache": true,
26
- "vocab_size": 32001
27
  }
 
23
  "torch_dtype": "float16",
24
  "transformers_version": "4.34.0",
25
  "use_cache": true,
26
+ "vocab_size": 32000
27
  }
model-00001-of-00018.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11c09d291f3995fc848481fb89673aa4f4e8d332f6b0c121804370ef100fa6f3
3
- size 8141296360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e43f5076fadec86dd1438d647a1b97cee2ecf27913880541b979b6681050faae
3
+ size 8141279976
model-00018-of-00018.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7de2bca4b674881e49c54e5c84c1b66124b9355c0acc229404db5e627be71972
3
- size 1933640464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c51592d65b959ecb5ea58c60b6e8b7c4d28e196a08d4ed343ed25f582e7d21
3
+ size 1933624080
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 137953329152
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00018-of-00018.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 137953296384
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00018-of-00018.safetensors",
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
  "unk_token": "<unk>"
6
  }
 
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
+ "pad_token": "<unk>",
5
  "unk_token": "<unk>"
6
  }
tokenizer_config.json CHANGED
@@ -25,14 +25,6 @@
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": false
28
- },
29
- "32000": {
30
- "content": "[PAD]",
31
- "lstrip": true,
32
- "normalized": false,
33
- "rstrip": true,
34
- "single_word": false,
35
- "special": true
36
  }
37
  },
38
  "additional_special_tokens": [],
@@ -41,12 +33,12 @@
41
  "eos_token": "</s>",
42
  "legacy": true,
43
  "model_max_length": 32768,
44
- "pad_token": "[PAD]",
45
  "padding_side": "right",
46
  "sp_model_kwargs": {},
47
  "spaces_between_special_tokens": false,
48
  "tokenizer_class": "LlamaTokenizer",
49
- "tokenizer_file": "t:\\models\\Text Generation\\models\\llama-2-70b\\tokenizer.json",
50
  "unk_token": "<unk>",
51
  "use_default_system_prompt": true
52
  }
 
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": false
 
 
 
 
 
 
 
 
28
  }
29
  },
30
  "additional_special_tokens": [],
 
33
  "eos_token": "</s>",
34
  "legacy": true,
35
  "model_max_length": 32768,
36
+ "pad_token": "<unk>",
37
  "padding_side": "right",
38
  "sp_model_kwargs": {},
39
  "spaces_between_special_tokens": false,
40
  "tokenizer_class": "LlamaTokenizer",
41
+ "tokenizer_file": "tokenizer.json",
42
  "unk_token": "<unk>",
43
  "use_default_system_prompt": true
44
  }