Shih-Lun Wu commited on
Commit
9434c7c
1 Parent(s): 8810a3b

add model files

Browse files
Files changed (2) hide show
  1. README.md +93 -0
  2. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - speech-recognition
6
+ - openai-whisper
7
+ language: en
8
+ datasets:
9
+ - chime4
10
+ license: cc-by-4.0
11
+ ---
12
+
13
+ ## ESPnet2 ASR model
14
+
15
+ ### `espnet/shihlun_asr_whisper_medium_finetuned_chime4`
16
+ This model was trained by Shih-Lun Wu (slseanwu) using the chime4 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+ ```bash
20
+ cd espnet
21
+ pip install -e .
22
+ cd egs2/chime4/asr1
23
+
24
+ train_set=tr05_multi_noisy_si284 # tr05_multi_noisy (original training data) or tr05_multi_noisy_si284 (add si284 data)
25
+ valid_set=dt05_multi_isolated_1ch_track
26
+ test_sets="dt05_real_isolated_1ch_track dt05_simu_isolated_1ch_track et05_real_isolated_1ch_track et05_simu_isolated_1ch_track"
27
+
28
+ asr_tag=whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs
29
+ asr_config=conf/tuning/train_asr_whisper_full.yaml
30
+ inference_config=conf/decode_asr_whisper_noctc_greedy.yaml
31
+
32
+ ./asr.sh \
33
+ --skip_data_prep false \
34
+ --skip_train true \
35
+ --skip_eval false \
36
+ --lang en \
37
+ --ngpu 1 \
38
+ --nj 4 \
39
+ --stage 1 \
40
+ --stop_stage 13 \
41
+ --gpu_inference true \
42
+ --inference_nj 1 \
43
+ --token_type whisper_multilingual \
44
+ --feats_normalize '' \
45
+ --max_wav_duration 30 \
46
+ --feats_type raw \
47
+ --use_lm false \
48
+ --cleaner whisper_en \
49
+ --asr_tag "${asr_tag}" \
50
+ --asr_config "${asr_config}" \
51
+ --inference_config "${inference_config}" \
52
+ --inference_asr_model valid.acc.ave.pth \
53
+ --train_set "${train_set}" \
54
+ --valid_set "${valid_set}" \
55
+ --test_sets "${test_sets}" "$@"
56
+ ```
57
+
58
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
59
+ # RESULTS
60
+ ## Environments
61
+ - date: `Tue Jan 10 04:15:30 CST 2023`
62
+ - python version: `3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]`
63
+ - espnet version: `espnet 202211`
64
+ - pytorch version: `pytorch 1.12.1`
65
+ - Git hash: `d89be931dcc8f61437ac49cbe39a773f2054c50c`
66
+ - Commit date: `Mon Jan 9 11:06:45 2023 -0600`
67
+
68
+ ## asr_whisper_medium_lr1e-5_adamw_wd1e-2_3epochs
69
+ ### WER
70
+
71
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
72
+ |---|---|---|---|---|---|---|---|---|
73
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|24791|97.8|1.7|0.5|0.3|2.5|24.5|
74
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|24792|96.1|3.0|0.9|0.5|4.4|35.6|
75
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|19341|96.4|2.9|0.7|0.5|4.1|33.0|
76
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|19344|93.4|5.0|1.7|0.8|7.4|41.8|
77
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|24791|97.7|1.8|0.5|0.4|2.8|25.5|
78
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|24792|96.0|3.3|0.8|0.7|4.8|36.0|
79
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|19341|96.1|3.3|0.6|0.7|4.6|34.9|
80
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|19344|92.9|5.8|1.3|1.2|8.3|43.2|
81
+
82
+ ### CER
83
+
84
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
85
+ |---|---|---|---|---|---|---|---|---|
86
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|141889|99.1|0.3|0.5|0.3|1.2|24.5|
87
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|141900|98.2|0.8|1.0|0.5|2.3|35.6|
88
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|110558|98.5|0.7|0.8|0.5|1.9|33.0|
89
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|110572|96.5|1.6|1.9|0.8|4.3|41.8|
90
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|141889|99.1|0.4|0.5|0.5|1.3|25.5|
91
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|141900|98.2|0.9|0.9|0.6|2.4|36.0|
92
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|110558|98.4|0.9|0.7|0.6|2.2|34.9|
93
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|110572|96.3|2.0|1.7|1.2|4.9|43.2|
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202211'
2
+ files:
3
+ asr_model_file: exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/valid.acc.ave.pth
4
+ python: "3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]"
5
+ timestamp: "Tue Jan 10 04:15:30 CST 2023"
6
+ torch: 1.12.1+cu117
7
+ yaml_files:
8
+ asr_train_config: exp/asr_whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs/config.yaml