joaoalvarenga
commited on
Commit
•
ce4dd61
1
Parent(s):
02c5d42
first version
Browse files- .gitattributes +1 -0
- added_tokens.json +1 -0
- config.json +76 -0
- preprocessor_config.json +8 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
.gitattributes
CHANGED
@@ -14,3 +14,4 @@
|
|
14 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
15 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
16 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
|
|
|
14 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
15 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
16 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
17 |
+
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<s>": 147, "</s>": 148}
|
config.json
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/home/joaoalvarenga/wav2vec2-large-xlsr-italian/checkpoint-9800",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"apply_spec_augment": true,
|
5 |
+
"architectures": [
|
6 |
+
"Wav2Vec2ForCTC"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"conv_bias": true,
|
11 |
+
"conv_dim": [
|
12 |
+
512,
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512
|
19 |
+
],
|
20 |
+
"conv_kernel": [
|
21 |
+
10,
|
22 |
+
3,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
2,
|
27 |
+
2
|
28 |
+
],
|
29 |
+
"conv_stride": [
|
30 |
+
5,
|
31 |
+
2,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2
|
37 |
+
],
|
38 |
+
"ctc_loss_reduction": "mean",
|
39 |
+
"ctc_zero_infinity": false,
|
40 |
+
"do_stable_layer_norm": true,
|
41 |
+
"eos_token_id": 2,
|
42 |
+
"feat_extract_activation": "gelu",
|
43 |
+
"feat_extract_dropout": 0.0,
|
44 |
+
"feat_extract_norm": "layer",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"final_dropout": 0.0,
|
47 |
+
"gradient_checkpointing": true,
|
48 |
+
"hidden_act": "gelu",
|
49 |
+
"hidden_dropout": 0.1,
|
50 |
+
"hidden_size": 1024,
|
51 |
+
"initializer_range": 0.02,
|
52 |
+
"intermediate_size": 4096,
|
53 |
+
"layer_norm_eps": 1e-05,
|
54 |
+
"layerdrop": 0.1,
|
55 |
+
"mask_channel_length": 10,
|
56 |
+
"mask_channel_min_space": 1,
|
57 |
+
"mask_channel_other": 0.0,
|
58 |
+
"mask_channel_prob": 0.0,
|
59 |
+
"mask_channel_selection": "static",
|
60 |
+
"mask_feature_length": 10,
|
61 |
+
"mask_feature_prob": 0.0,
|
62 |
+
"mask_time_length": 10,
|
63 |
+
"mask_time_min_space": 1,
|
64 |
+
"mask_time_other": 0.0,
|
65 |
+
"mask_time_prob": 0.05,
|
66 |
+
"mask_time_selection": "static",
|
67 |
+
"model_type": "wav2vec2",
|
68 |
+
"num_attention_heads": 16,
|
69 |
+
"num_conv_pos_embedding_groups": 16,
|
70 |
+
"num_conv_pos_embeddings": 128,
|
71 |
+
"num_feat_extract_layers": 7,
|
72 |
+
"num_hidden_layers": 24,
|
73 |
+
"pad_token_id": 146,
|
74 |
+
"transformers_version": "4.4.0",
|
75 |
+
"vocab_size": 147
|
76 |
+
}
|
preprocessor_config.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"feature_size": 1,
|
4 |
+
"padding_side": "right",
|
5 |
+
"padding_value": 0.0,
|
6 |
+
"return_attention_mask": true,
|
7 |
+
"sampling_rate": 16000
|
8 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e1325e6e6dca4a5fa6e6a615c0fc13bf0015eb0675476a0c8be7b79163ab359
|
3 |
+
size 1262525543
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": "/home/joaoalvarenga/wav2vec2-large-xlsr-italian/special_tokens_map.json", "tokenizer_file": null, "name_or_path": "/home/joaoalvarenga/wav2vec2-large-xlsr-italian"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"w": 0, "ş": 1, "ʿ": 2, "л": 3, "ъ": 4, "»": 5, "̇": 6, "đ": 7, "ì": 8, "ã": 9, "æ": 10, "°": 11, "ț": 12, "s": 13, "t": 14, ">": 15, "«": 16, "[": 17, "е": 18, "ン": 19, "r": 20, "o": 21, "l": 23, "´": 24, "ś": 25, "ʻ": 26, "¡": 27, "カ": 28, "ñ": 29, "ו": 30, "<": 31, "ī": 32, "…": 33, "ô": 34, "ø": 35, "ṭ": 36, "f": 37, "ð": 38, "多": 39, "б": 40, "ё": 41, "ř": 42, "ő": 43, "丰": 44, "ı": 45, "í": 46, "c": 47, "万": 48, "š": 49, "ė": 50, "ō": 51, "=": 52, "é": 53, ")": 54, "y": 55, "þ": 56, "ṣ": 57, "/": 58, "(": 59, "–": 60, "張": 61, "å": 62, "ě": 63, "а": 64, "ה": 65, "ü": 66, "e": 67, "ö": 68, "á": 69, "~": 70, "`": 71, "ū": 72, "i": 73, "g": 74, "ß": 75, "_": 76, "’": 77, "峰": 78, "u": 79, "あ": 80, "ň": 81, "ノ": 82, "q": 83, "+": 84, "ú": 85, "ÿ": 86, "ê": 87, "p": 88, "ž": 89, "$": 90, "z": 91, "ʼ": 92, "ц": 93, "č": 94, "ア": 95, "ṛ": 96, "x": 97, "三": 98, "љ": 99, "ù": 100, "ò": 101, "ź": 102, "ľ": 103, "ń": 104, "û": 105, "#": 106, "j": 107, "„": 108, "î": 109, "ḥ": 110, "ー": 111, "h": 112, "è": 113, "禅": 114, "'": 115, "à": 116, "ë": 117, "m": 118, "ł": 119, "ד": 120, "d": 121, "ʾ": 122, "ę": 123, "ā": 124, "ğ": 125, "ï": 126, "]": 127, "旅": 128, "ə": 129, "b": 130, "ć": 131, "ʹ": 132, "サ": 133, "µ": 134, "ä": 135, "œ": 136, "v": 137, "キ": 138, "古": 139, "ș": 140, "n": 141, "k": 142, "ó": 143, "a": 144, "|": 22, "[UNK]": 145, "[PAD]": 146}
|