Training in progress, step 500
Browse files- .ipynb_checkpoints/run-checkpoint.sh +1 -1
- .ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py +5 -1
- added_tokens.json +1 -1
- config.json +2 -2
- pytorch_model.bin +2 -2
- run.sh +1 -1
- run_speech_recognition_ctc.py +5 -1
- runs/Feb02_06-54-25_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1643785646.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.33872.0 +2 -2
- runs/Feb02_16-57-51_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/1643821174.2161925/events.out.tfevents.1643821174.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.271825.1 +3 -0
- runs/Feb02_16-57-51_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1643821174.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.271825.0 +3 -0
- special_tokens_map.json +1 -1
- training_args.bin +1 -1
- vocab.json +1 -1
.ipynb_checkpoints/run-checkpoint.sh
CHANGED
@@ -13,7 +13,7 @@ python xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
|
|
13 |
--length_column_name="input_length" \
|
14 |
--evaluation_strategy="steps" \
|
15 |
--text_column_name="sentence" \
|
16 |
-
--eval_metrics="
|
17 |
--save_steps="500" \
|
18 |
--eval_steps="500" \
|
19 |
--logging_steps="100" \
|
|
|
13 |
--length_column_name="input_length" \
|
14 |
--evaluation_strategy="steps" \
|
15 |
--text_column_name="sentence" \
|
16 |
+
--eval_metrics="cer" \
|
17 |
--save_steps="500" \
|
18 |
--eval_steps="500" \
|
19 |
--logging_steps="100" \
|
.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py
CHANGED
@@ -448,7 +448,11 @@ def main():
|
|
448 |
batch["target_text"] = re.sub(
|
449 |
chars_to_ignore_regex,
|
450 |
"",
|
451 |
-
re.sub("
|
|
|
|
|
|
|
|
|
452 |
) + " "
|
453 |
else:
|
454 |
batch["target_text"] = batch[text_column_name].lower() + " "
|
|
|
448 |
batch["target_text"] = re.sub(
|
449 |
chars_to_ignore_regex,
|
450 |
"",
|
451 |
+
re.sub("['`´]", "’", # elsewhere probably meant as glottal stop
|
452 |
+
re.sub("([og])['`´]", "\g<1>‘", # after o/g indicate modified char
|
453 |
+
unidecode.unidecode(batch[text_column_name]).lower()
|
454 |
+
)
|
455 |
+
)
|
456 |
) + " "
|
457 |
else:
|
458 |
batch["target_text"] = batch[text_column_name].lower() + " "
|
added_tokens.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<s>":
|
|
|
1 |
+
{"<s>": 31, "</s>": 32}
|
config.json
CHANGED
@@ -76,7 +76,7 @@
|
|
76 |
"num_hidden_layers": 24,
|
77 |
"num_negatives": 100,
|
78 |
"output_hidden_size": 1024,
|
79 |
-
"pad_token_id":
|
80 |
"proj_codevector_dim": 768,
|
81 |
"tdnn_dilation": [
|
82 |
1,
|
@@ -102,7 +102,7 @@
|
|
102 |
"torch_dtype": "float32",
|
103 |
"transformers_version": "4.17.0.dev0",
|
104 |
"use_weighted_layer_sum": false,
|
105 |
-
"vocab_size":
|
106 |
"xvector_output_dim": 512,
|
107 |
"zero_infinity": true
|
108 |
}
|
|
|
76 |
"num_hidden_layers": 24,
|
77 |
"num_negatives": 100,
|
78 |
"output_hidden_size": 1024,
|
79 |
+
"pad_token_id": 30,
|
80 |
"proj_codevector_dim": 768,
|
81 |
"tdnn_dilation": [
|
82 |
1,
|
|
|
102 |
"torch_dtype": "float32",
|
103 |
"transformers_version": "4.17.0.dev0",
|
104 |
"use_weighted_layer_sum": false,
|
105 |
+
"vocab_size": 33,
|
106 |
"xvector_output_dim": 512,
|
107 |
"zero_infinity": true
|
108 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92ce1b47197250ba9b80105b9b3a57164fd5eaa1d25043e6b5dfbc5b22589270
|
3 |
+
size 1262058993
|
run.sh
CHANGED
@@ -13,7 +13,7 @@ python xls-r-uzbek-cv8/run_speech_recognition_ctc.py \
|
|
13 |
--length_column_name="input_length" \
|
14 |
--evaluation_strategy="steps" \
|
15 |
--text_column_name="sentence" \
|
16 |
-
--eval_metrics="
|
17 |
--save_steps="500" \
|
18 |
--eval_steps="500" \
|
19 |
--logging_steps="100" \
|
|
|
13 |
--length_column_name="input_length" \
|
14 |
--evaluation_strategy="steps" \
|
15 |
--text_column_name="sentence" \
|
16 |
+
--eval_metrics="cer" \
|
17 |
--save_steps="500" \
|
18 |
--eval_steps="500" \
|
19 |
--logging_steps="100" \
|
run_speech_recognition_ctc.py
CHANGED
@@ -448,7 +448,11 @@ def main():
|
|
448 |
batch["target_text"] = re.sub(
|
449 |
chars_to_ignore_regex,
|
450 |
"",
|
451 |
-
re.sub("
|
|
|
|
|
|
|
|
|
452 |
) + " "
|
453 |
else:
|
454 |
batch["target_text"] = batch[text_column_name].lower() + " "
|
|
|
448 |
batch["target_text"] = re.sub(
|
449 |
chars_to_ignore_regex,
|
450 |
"",
|
451 |
+
re.sub("['`´]", "’", # elsewhere probably meant as glottal stop
|
452 |
+
re.sub("([og])['`´]", "\g<1>‘", # after o/g indicate modified char
|
453 |
+
unidecode.unidecode(batch[text_column_name]).lower()
|
454 |
+
)
|
455 |
+
)
|
456 |
) + " "
|
457 |
else:
|
458 |
batch["target_text"] = batch[text_column_name].lower() + " "
|
runs/Feb02_06-54-25_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1643785646.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.33872.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3190230d80b210fe7ddda30f0da221c3e1e50ac2b1ebc18be513c0c83f125c18
|
3 |
+
size 25074
|
runs/Feb02_16-57-51_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/1643821174.2161925/events.out.tfevents.1643821174.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.271825.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32ca394f643571fa583c586ad18d3e1795498896953804e4db092cacc760025f
|
3 |
+
size 4799
|
runs/Feb02_16-57-51_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1643821174.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.271825.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b09a06724cb8553a167b2b4f99d617b29d0c715672c69066c343e058abb1088
|
3 |
+
size 5852
|
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3055
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ade06333b9174b6ec9ef767b07b6738941cd04e17f17deb11ad4726836e129b6
|
3 |
size 3055
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "‘": 27, "|": 0, "[UNK]":
|
|
|
1 |
+
{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "‘": 27, "’": 28, "|": 0, "[UNK]": 29, "[PAD]": 30}
|