Training in progress, step 500
Browse files- .ipynb_checkpoints/mozilla-foundation_common_voice_8_0_ky_test_eval_results-checkpoint.txt +2 -0
- .ipynb_checkpoints/run-checkpoint.sh +6 -6
- config.json +2 -2
- pytorch_model.bin +1 -1
- run.sh +6 -6
- runs/Feb05_21-45-17_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/1644097571.4686165/events.out.tfevents.1644097571.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.2077552.1 +3 -0
- runs/Feb05_21-45-17_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1644097571.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.2077552.0 +3 -0
- special_tokens_map.json +1 -1
- training_args.bin +1 -1
.ipynb_checkpoints/mozilla-foundation_common_voice_8_0_ky_test_eval_results-checkpoint.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
WER: 0.31282098312545853
|
2 |
+
CER: 0.07663251646764126
|
.ipynb_checkpoints/run-checkpoint.sh
CHANGED
@@ -2,11 +2,11 @@ python ~/xls-r-kyrgiz-cv8/run_speech_recognition_ctc.py \
|
|
2 |
--dataset_name="mozilla-foundation/common_voice_8_0" \
|
3 |
--model_name_or_path="facebook/wav2vec2-xls-r-300m" \
|
4 |
--dataset_config_name="ky" \
|
5 |
-
--train_split_name="train+validation
|
6 |
-
--eval_split_name="
|
7 |
--output_dir="~/xls-r-kyrgiz-cv8" \
|
8 |
--overwrite_output_dir \
|
9 |
-
--num_train_epochs="
|
10 |
--per_device_train_batch_size="32" \
|
11 |
--per_device_eval_batch_size="8" \
|
12 |
--gradient_accumulation_steps="4" \
|
@@ -16,19 +16,19 @@ python ~/xls-r-kyrgiz-cv8/run_speech_recognition_ctc.py \
|
|
16 |
--evaluation_strategy="steps" \
|
17 |
--text_column_name="sentence" \
|
18 |
--chars_to_ignore , ? . ! \- \; \: \\ _ \| ‒ ☺ ♂ © « ¬ » \" „ “ % ” � — ’ ، ؛ ؟ ‹ › − … – \
|
19 |
-
--eval_metrics
|
20 |
--save_steps="500" \
|
21 |
--eval_steps="500" \
|
22 |
--logging_steps="100" \
|
23 |
--min_duration_in_seconds="0.2" \
|
24 |
-
--layerdrop="0.
|
25 |
--activation_dropout="0.1" \
|
26 |
--save_total_limit="3" \
|
27 |
--freeze_feature_encoder \
|
28 |
--feat_proj_dropout="0.01" \
|
29 |
--mask_time_prob="0.50" \
|
30 |
--mask_time_length="10" \
|
31 |
-
--mask_feature_prob="0.
|
32 |
--mask_feature_length="64" \
|
33 |
--gradient_checkpointing \
|
34 |
--use_auth_token \
|
|
|
2 |
--dataset_name="mozilla-foundation/common_voice_8_0" \
|
3 |
--model_name_or_path="facebook/wav2vec2-xls-r-300m" \
|
4 |
--dataset_config_name="ky" \
|
5 |
+
--train_split_name="train+validation+other" \
|
6 |
+
--eval_split_name="test[50%:]" \
|
7 |
--output_dir="~/xls-r-kyrgiz-cv8" \
|
8 |
--overwrite_output_dir \
|
9 |
+
--num_train_epochs="300" \
|
10 |
--per_device_train_batch_size="32" \
|
11 |
--per_device_eval_batch_size="8" \
|
12 |
--gradient_accumulation_steps="4" \
|
|
|
16 |
--evaluation_strategy="steps" \
|
17 |
--text_column_name="sentence" \
|
18 |
--chars_to_ignore , ? . ! \- \; \: \\ _ \| ‒ ☺ ♂ © « ¬ » \" „ “ % ” � — ’ ، ؛ ؟ ‹ › − … – \
|
19 |
+
--eval_metrics wer cer \
|
20 |
--save_steps="500" \
|
21 |
--eval_steps="500" \
|
22 |
--logging_steps="100" \
|
23 |
--min_duration_in_seconds="0.2" \
|
24 |
+
--layerdrop="0.0" \
|
25 |
--activation_dropout="0.1" \
|
26 |
--save_total_limit="3" \
|
27 |
--freeze_feature_encoder \
|
28 |
--feat_proj_dropout="0.01" \
|
29 |
--mask_time_prob="0.50" \
|
30 |
--mask_time_length="10" \
|
31 |
+
--mask_feature_prob="0.4" \
|
32 |
--mask_feature_length="64" \
|
33 |
--gradient_checkpointing \
|
34 |
--use_auth_token \
|
config.json
CHANGED
@@ -58,10 +58,10 @@
|
|
58 |
"initializer_range": 0.02,
|
59 |
"intermediate_size": 4096,
|
60 |
"layer_norm_eps": 1e-05,
|
61 |
-
"layerdrop": 0.
|
62 |
"mask_feature_length": 64,
|
63 |
"mask_feature_min_masks": 0,
|
64 |
-
"mask_feature_prob": 0.
|
65 |
"mask_time_length": 10,
|
66 |
"mask_time_min_masks": 2,
|
67 |
"mask_time_prob": 0.5,
|
|
|
58 |
"initializer_range": 0.02,
|
59 |
"intermediate_size": 4096,
|
60 |
"layer_norm_eps": 1e-05,
|
61 |
+
"layerdrop": 0.0,
|
62 |
"mask_feature_length": 64,
|
63 |
"mask_feature_min_masks": 0,
|
64 |
+
"mask_feature_prob": 0.4,
|
65 |
"mask_time_length": 10,
|
66 |
"mask_time_min_masks": 2,
|
67 |
"mask_time_prob": 0.5,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262095857
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8dc2cfa0eff32907f7b3e1700f723eb02ccfbc100c03c38d7bf6d75f25e8565
|
3 |
size 1262095857
|
run.sh
CHANGED
@@ -2,11 +2,11 @@ python ~/xls-r-kyrgiz-cv8/run_speech_recognition_ctc.py \
|
|
2 |
--dataset_name="mozilla-foundation/common_voice_8_0" \
|
3 |
--model_name_or_path="facebook/wav2vec2-xls-r-300m" \
|
4 |
--dataset_config_name="ky" \
|
5 |
-
--train_split_name="train+validation
|
6 |
-
--eval_split_name="
|
7 |
--output_dir="~/xls-r-kyrgiz-cv8" \
|
8 |
--overwrite_output_dir \
|
9 |
-
--num_train_epochs="
|
10 |
--per_device_train_batch_size="32" \
|
11 |
--per_device_eval_batch_size="8" \
|
12 |
--gradient_accumulation_steps="4" \
|
@@ -16,19 +16,19 @@ python ~/xls-r-kyrgiz-cv8/run_speech_recognition_ctc.py \
|
|
16 |
--evaluation_strategy="steps" \
|
17 |
--text_column_name="sentence" \
|
18 |
--chars_to_ignore , ? . ! \- \; \: \\ _ \| ‒ ☺ ♂ © « ¬ » \" „ “ % ” � — ’ ، ؛ ؟ ‹ › − … – \
|
19 |
-
--eval_metrics
|
20 |
--save_steps="500" \
|
21 |
--eval_steps="500" \
|
22 |
--logging_steps="100" \
|
23 |
--min_duration_in_seconds="0.2" \
|
24 |
-
--layerdrop="0.
|
25 |
--activation_dropout="0.1" \
|
26 |
--save_total_limit="3" \
|
27 |
--freeze_feature_encoder \
|
28 |
--feat_proj_dropout="0.01" \
|
29 |
--mask_time_prob="0.50" \
|
30 |
--mask_time_length="10" \
|
31 |
-
--mask_feature_prob="0.
|
32 |
--mask_feature_length="64" \
|
33 |
--gradient_checkpointing \
|
34 |
--use_auth_token \
|
|
|
2 |
--dataset_name="mozilla-foundation/common_voice_8_0" \
|
3 |
--model_name_or_path="facebook/wav2vec2-xls-r-300m" \
|
4 |
--dataset_config_name="ky" \
|
5 |
+
--train_split_name="train+validation+other" \
|
6 |
+
--eval_split_name="test[50%:]" \
|
7 |
--output_dir="~/xls-r-kyrgiz-cv8" \
|
8 |
--overwrite_output_dir \
|
9 |
+
--num_train_epochs="300" \
|
10 |
--per_device_train_batch_size="32" \
|
11 |
--per_device_eval_batch_size="8" \
|
12 |
--gradient_accumulation_steps="4" \
|
|
|
16 |
--evaluation_strategy="steps" \
|
17 |
--text_column_name="sentence" \
|
18 |
--chars_to_ignore , ? . ! \- \; \: \\ _ \| ‒ ☺ ♂ © « ¬ » \" „ “ % ” � — ’ ، ؛ ؟ ‹ › − … – \
|
19 |
+
--eval_metrics wer cer \
|
20 |
--save_steps="500" \
|
21 |
--eval_steps="500" \
|
22 |
--logging_steps="100" \
|
23 |
--min_duration_in_seconds="0.2" \
|
24 |
+
--layerdrop="0.0" \
|
25 |
--activation_dropout="0.1" \
|
26 |
--save_total_limit="3" \
|
27 |
--freeze_feature_encoder \
|
28 |
--feat_proj_dropout="0.01" \
|
29 |
--mask_time_prob="0.50" \
|
30 |
--mask_time_length="10" \
|
31 |
+
--mask_feature_prob="0.4" \
|
32 |
--mask_feature_length="64" \
|
33 |
--gradient_checkpointing \
|
34 |
--use_auth_token \
|
runs/Feb05_21-45-17_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/1644097571.4686165/events.out.tfevents.1644097571.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.2077552.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aec5c9bfe83fac7fcd98acde05ce85aa7a981132e4299d52fc1c40e06bace6e9
|
3 |
+
size 4829
|
runs/Feb05_21-45-17_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1644097571.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.2077552.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef57ddb22dc8090ed5778c2b12a2e70a92c8657a552e2725c94afc9b4578a8a4
|
3 |
+
size 5929
|
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3055
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5aa32cf8615d4a9dfa30aa360c71b8bbb63969088668bb62da942e0cb4353cd
|
3 |
size 3055
|