waveletdeboshir commited on
Commit
c319bed
1 Parent(s): 5f1bd4c

Upload model, tokenizer, preprocessor

Browse files
added_tokens.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|en|>": 4199,
3
+ "<|nocaptions|>": 4205,
4
+ "<|notimestamps|>": 4206,
5
+ "<|ru|>": 4200,
6
+ "<|startoflm|>": 4203,
7
+ "<|startofprev|>": 4204,
8
+ "<|startoftranscript|>": 4198,
9
+ "<|transcribe|>": 4202,
10
+ "<|translate|>": 4201
11
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8069ffabe90ccf8ab6b0c22fb38e7282b81dd54b7c2b3aa89218f3aab0a6ebe
3
+ size 192800072
preprocessor_config.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "4197": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "4198": {
14
+ "content": "<|startoftranscript|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "4199": {
22
+ "content": "<|en|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "4200": {
30
+ "content": "<|ru|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "4201": {
38
+ "content": "<|translate|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "4202": {
46
+ "content": "<|transcribe|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "4203": {
54
+ "content": "<|startoflm|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "4204": {
62
+ "content": "<|startofprev|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "4205": {
70
+ "content": "<|nocaptions|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "4206": {
78
+ "content": "<|notimestamps|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ }
85
+ },
86
+ "additional_special_tokens": [
87
+ "<|endoftext|>",
88
+ "<|startoftranscript|>",
89
+ "<|en|>",
90
+ "<|ru|>",
91
+ "<|translate|>",
92
+ "<|transcribe|>",
93
+ "<|startoflm|>",
94
+ "<|startofprev|>",
95
+ "<|nocaptions|>",
96
+ "<|notimestamps|>"
97
+ ],
98
+ "bos_token": "<|endoftext|>",
99
+ "clean_up_tokenization_spaces": true,
100
+ "eos_token": "<|endoftext|>",
101
+ "errors": "replace",
102
+ "model_max_length": 1024,
103
+ "pad_token": "<|endoftext|>",
104
+ "processor_class": "WhisperProcessor",
105
+ "return_attention_mask": false,
106
+ "tokenizer_class": "WhisperTokenizer",
107
+ "unk_token": "<|endoftext|>"
108
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff