ArthurZ HF staff commited on
Commit
3348231
1 Parent(s): 5ff6499

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -17,7 +17,6 @@
17
  "<|da|>": 50285,
18
  "<|de|>": 50261,
19
  "<|el|>": 50281,
20
- "<|endoftext|>": 50257,
21
  "<|en|>": 50259,
22
  "<|es|>": 50262,
23
  "<|et|>": 50307,
 
17
  "<|da|>": 50285,
18
  "<|de|>": 50261,
19
  "<|el|>": 50281,
 
20
  "<|en|>": 50259,
21
  "<|es|>": 50262,
22
  "<|et|>": 50307,
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ a
3
  Ġt h
4
  i n
 
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ a
3
  Ġt h
4
  i n
special_tokens_map.json CHANGED
@@ -124,7 +124,7 @@
124
  },
125
  "pad_token": "<|endoftext|>",
126
  "unk_token": {
127
- "content": "",
128
  "lstrip": false,
129
  "normalized": true,
130
  "rstrip": false,
 
124
  },
125
  "pad_token": "<|endoftext|>",
126
  "unk_token": {
127
+ "content": "<|endoftext|>",
128
  "lstrip": false,
129
  "normalized": true,
130
  "rstrip": false,
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -19,7 +19,6 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "openai/whisper-large",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
@@ -27,7 +26,7 @@
27
  "tokenizer_class": "WhisperTokenizer",
28
  "unk_token": {
29
  "__type": "AddedToken",
30
- "content": "",
31
  "lstrip": false,
32
  "normalized": true,
33
  "rstrip": false,
 
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
 
22
  "pad_token": null,
23
  "processor_class": "WhisperProcessor",
24
  "return_attention_mask": false,
 
26
  "tokenizer_class": "WhisperTokenizer",
27
  "unk_token": {
28
  "__type": "AddedToken",
29
+ "content": "<|endoftext|>",
30
  "lstrip": false,
31
  "normalized": true,
32
  "rstrip": false,
vocab.json CHANGED
The diff for this file is too large to render. See raw diff