ssiidd commited on
Commit
65191fa
1 Parent(s): a79e3a6

Add slurp model files

Browse files
.DS_Store ADDED
Binary file (10.2 kB). View file
 
README.md CHANGED
@@ -1,7 +1,7 @@
1
  ---
2
- title: ESPnet2 SLU
3
- emoji: 👀
4
- colorFrom: pink
5
  colorTo: green
6
  sdk: gradio
7
  app_file: app.py
 
1
  ---
2
+ title: ESPnet2 TTS
3
+ emoji: 📈
4
+ colorFrom: green
5
  colorTo: green
6
  sdk: gradio
7
  app_file: app.py
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ import torch
4
+ import scipy.io.wavfile
5
+ from espnet2.bin.tts_inference import Text2Speech
6
+ from espnet2.utils.types import str_or_none
7
+ from espnet2.bin.asr_inference import Speech2Text
8
+
9
+ # tagen = 'kan-bayashi/ljspeech_vits'
10
+ # vocoder_tagen = "none"
11
+
12
+ speech2text = Speech2Text.from_pretrained(
13
+ asr_train_config="slurp/config.yaml",
14
+ asr_model_file="slurp/valid.acc.ave_10best.pth",
15
+ # Decoding parameters are not included in the model file
16
+ nbest=1
17
+ )
18
+ # Confirm the sampling rate is equal to that of the training corpus.
19
+ # If not, you need to resample the audio data before inputting to speech2text
20
+ speech, rate = soundfile.read("audio--1504190171-headset.flac")
21
+ nbests = speech2text(speech)
22
+
23
+ text, *_ = nbests[0]
24
+ print(text)
25
+ exit()
26
+
27
+ text2speechen = Text2Speech.from_pretrained(
28
+ model_tag=str_or_none(tagen),
29
+ vocoder_tag=str_or_none(vocoder_tagen),
30
+ device="cpu",
31
+ # Only for Tacotron 2 & Transformer
32
+ threshold=0.5,
33
+ # Only for Tacotron 2
34
+ minlenratio=0.0,
35
+ maxlenratio=10.0,
36
+ use_att_constraint=False,
37
+ backward_window=1,
38
+ forward_window=3,
39
+ # Only for FastSpeech & FastSpeech2 & VITS
40
+ speed_control_alpha=1.0,
41
+ # Only for VITS
42
+ noise_scale=0.333,
43
+ noise_scale_dur=0.333,
44
+ )
45
+
46
+
47
+ tagjp = 'kan-bayashi/jsut_full_band_vits_prosody'
48
+ vocoder_tagjp = 'none'
49
+
50
+ text2speechjp = Text2Speech.from_pretrained(
51
+ model_tag=str_or_none(tagjp),
52
+ vocoder_tag=str_or_none(vocoder_tagjp),
53
+ device="cpu",
54
+ # Only for Tacotron 2 & Transformer
55
+ threshold=0.5,
56
+ # Only for Tacotron 2
57
+ minlenratio=0.0,
58
+ maxlenratio=10.0,
59
+ use_att_constraint=False,
60
+ backward_window=1,
61
+ forward_window=3,
62
+ # Only for FastSpeech & FastSpeech2 & VITS
63
+ speed_control_alpha=1.0,
64
+ # Only for VITS
65
+ noise_scale=0.333,
66
+ noise_scale_dur=0.333,
67
+ )
68
+
69
+ tagch = 'kan-bayashi/csmsc_full_band_vits'
70
+ vocoder_tagch = "none"
71
+
72
+ text2speechch = Text2Speech.from_pretrained(
73
+ model_tag=str_or_none(tagch),
74
+ vocoder_tag=str_or_none(vocoder_tagch),
75
+ device="cpu",
76
+ # Only for Tacotron 2 & Transformer
77
+ threshold=0.5,
78
+ # Only for Tacotron 2
79
+ minlenratio=0.0,
80
+ maxlenratio=10.0,
81
+ use_att_constraint=False,
82
+ backward_window=1,
83
+ forward_window=3,
84
+ # Only for FastSpeech & FastSpeech2 & VITS
85
+ speed_control_alpha=1.0,
86
+ # Only for VITS
87
+ noise_scale=0.333,
88
+ noise_scale_dur=0.333,
89
+ )
90
+
91
+ def inference(text,lang):
92
+ with torch.no_grad():
93
+ if lang == "english":
94
+ wav = text2speechen(text)["wav"]
95
+ scipy.io.wavfile.write("out.wav",text2speechen.fs , wav.view(-1).cpu().numpy())
96
+ if lang == "chinese":
97
+ wav = text2speechch(text)["wav"]
98
+ scipy.io.wavfile.write("out.wav",text2speechch.fs , wav.view(-1).cpu().numpy())
99
+ if lang == "japanese":
100
+ wav = text2speechjp(text)["wav"]
101
+ scipy.io.wavfile.write("out.wav",text2speechjp.fs , wav.view(-1).cpu().numpy())
102
+ return "out.wav"
103
+ title = "ESPnet2-TTS"
104
+ description = "Gradio demo for ESPnet2-TTS: Extending the Edge of TTS Research. To use it, simply add your audio, or click one of the examples to load them. Read more at the links below."
105
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2110.07840' target='_blank'>ESPnet2-TTS: Extending the Edge of TTS Research</a> | <a href='https://github.com/espnet/espnet' target='_blank'>Github Repo</a></p>"
106
+
107
+ examples=[['This paper describes ESPnet2-TTS, an end-to-end text-to-speech (E2E-TTS) toolkit. ESPnet2-TTS extends our earlier version, ESPnet-TTS, by adding many new features, including: on-the-fly flexible pre-processing, joint training with neural vocoders, and state-of-the-art TTS models with extensions like full-band E2E text-to-waveform modeling, which simplify the training pipeline and further enhance TTS performance. The unified design of our recipes enables users to quickly reproduce state-of-the-art E2E-TTS results',"english"],['レシピの統一された設計により、ユーザーは最先端のE2E-TTSの結果をすばやく再現できます。また、推論用の統合Pythonインターフェースで事前にトレーニングされたモデルを多数提供し、ユーザーがベースラインサンプルを生成してデモを構築するための迅速な手段を提供します。',"japanese"],['对英语和日语语料库的实验评估表明,我们提供的模型合成了与真实情况相当的话语,达到了最先进的水平',"chinese"]]
108
+
109
+ gr.Interface(
110
+ inference,
111
+ [gr.inputs.Textbox(label="input text",lines=10),gr.inputs.Radio(choices=["english", "chinese", "japanese"], type="value", default="english", label="language")],
112
+ gr.outputs.Audio(type="file", label="Output"),
113
+ title=title,
114
+ description=description,
115
+ article=article,
116
+ enable_queue=True,
117
+ examples=examples
118
+ ).launch(debug=True)
audio--1504190171-headset.flac ADDED
Binary file (40.9 kB). View file
 
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ cmake
2
+ libsndfile1
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ espnet==0.10.3
2
+ pyopenjtalk==0.1.5
3
+ parallel_wavegan==0.5.3
4
+ espnet_model_zoo
5
+ scipy
6
+ torch
slurp/config.yaml ADDED
@@ -0,0 +1,754 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_asr_conformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_train_asr_conformer_raw_en_word
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ keep_nbest_models: 10
41
+ grad_clip: 5.0
42
+ grad_clip_type: 2.0
43
+ grad_noise: false
44
+ accum_grad: 1
45
+ no_forward_run: false
46
+ resume: true
47
+ train_dtype: float32
48
+ use_amp: false
49
+ log_interval: null
50
+ use_tensorboard: true
51
+ use_wandb: false
52
+ wandb_project: null
53
+ wandb_id: null
54
+ wandb_entity: null
55
+ wandb_name: null
56
+ wandb_model_log_interval: -1
57
+ detect_anomaly: false
58
+ pretrain_path: null
59
+ init_param: []
60
+ ignore_init_mismatch: false
61
+ freeze_param: []
62
+ num_iters_per_epoch: null
63
+ batch_size: 20
64
+ valid_batch_size: null
65
+ batch_bins: 1000000
66
+ valid_batch_bins: null
67
+ train_shape_file:
68
+ - exp/asr_stats_raw_en_word/train/speech_shape
69
+ - exp/asr_stats_raw_en_word/train/text_shape.word
70
+ valid_shape_file:
71
+ - exp/asr_stats_raw_en_word/valid/speech_shape
72
+ - exp/asr_stats_raw_en_word/valid/text_shape.word
73
+ batch_type: folded
74
+ valid_batch_type: null
75
+ fold_length:
76
+ - 80000
77
+ - 150
78
+ sort_in_batch: descending
79
+ sort_batch: descending
80
+ multiple_iterator: false
81
+ chunk_length: 500
82
+ chunk_shift_ratio: 0.5
83
+ num_cache_chunks: 1024
84
+ train_data_path_and_name_and_type:
85
+ - - dump/raw/train/wav.scp
86
+ - speech
87
+ - sound
88
+ - - dump/raw/train/text
89
+ - text
90
+ - text
91
+ valid_data_path_and_name_and_type:
92
+ - - dump/raw/devel/wav.scp
93
+ - speech
94
+ - sound
95
+ - - dump/raw/devel/text
96
+ - text
97
+ - text
98
+ allow_variable_data_keys: false
99
+ max_cache_size: 0.0
100
+ max_cache_fd: 32
101
+ valid_max_cache_size: null
102
+ optim: adam
103
+ optim_conf:
104
+ lr: 0.0002
105
+ scheduler: warmuplr
106
+ scheduler_conf:
107
+ warmup_steps: 25000
108
+ token_list:
109
+ - <blank>
110
+ - <unk>
111
+ - ▁the
112
+ - s
113
+ - ▁to
114
+ - ▁i
115
+ - ▁me
116
+ - ▁you
117
+ - ▁what
118
+ - ▁a
119
+ - ▁is
120
+ - ▁my
121
+ - ▁please
122
+ - a
123
+ - ''''
124
+ - y
125
+ - ▁in
126
+ - ing
127
+ - ▁s
128
+ - e
129
+ - ▁for
130
+ - i
131
+ - ▁on
132
+ - d
133
+ - t
134
+ - o
135
+ - u
136
+ - er
137
+ - p
138
+ - ▁of
139
+ - es
140
+ - re
141
+ - l
142
+ - ▁it
143
+ - ▁p
144
+ - le
145
+ - ▁f
146
+ - ▁m
147
+ - ▁email
148
+ - ▁d
149
+ - m
150
+ - ▁c
151
+ - st
152
+ - r
153
+ - n
154
+ - ar
155
+ - ▁h
156
+ - b
157
+ - ▁that
158
+ - c
159
+ - ▁this
160
+ - h
161
+ - an
162
+ - email_query
163
+ - ▁play
164
+ - ▁re
165
+ - ▁b
166
+ - ▁do
167
+ - ▁can
168
+ - at
169
+ - ▁have
170
+ - g
171
+ - ▁from
172
+ - ▁and
173
+ - en
174
+ - email_sendemail
175
+ - ▁olly
176
+ - 'on'
177
+ - ▁new
178
+ - it
179
+ - qa_factoid
180
+ - calendar_set
181
+ - ▁any
182
+ - or
183
+ - ▁g
184
+ - ▁how
185
+ - ▁t
186
+ - ▁tell
187
+ - ch
188
+ - ▁not
189
+ - ▁about
190
+ - ▁at
191
+ - ate
192
+ - general_negate
193
+ - f
194
+ - ▁today
195
+ - ▁e
196
+ - ed
197
+ - ▁list
198
+ - ▁r
199
+ - in
200
+ - k
201
+ - ic
202
+ - social_post
203
+ - ▁are
204
+ - play_music
205
+ - general_quirky
206
+ - ▁l
207
+ - al
208
+ - v
209
+ - ent
210
+ - ▁n
211
+ - ▁be
212
+ - ▁an
213
+ - ▁st
214
+ - et
215
+ - ▁am
216
+ - general_praise
217
+ - ▁time
218
+ - weather_query
219
+ - ▁up
220
+ - ▁check
221
+ - calendar_query
222
+ - ▁w
223
+ - om
224
+ - ur
225
+ - ▁send
226
+ - ▁with
227
+ - ly
228
+ - w
229
+ - general_explain
230
+ - ad
231
+ - ▁th
232
+ - news_query
233
+ - ▁one
234
+ - ▁emails
235
+ - day
236
+ - ▁sh
237
+ - ce
238
+ - ▁last
239
+ - ve
240
+ - ▁he
241
+ - z
242
+ - ▁ch
243
+ - ▁will
244
+ - ▁set
245
+ - ▁would
246
+ - ▁was
247
+ - x
248
+ - general_repeat
249
+ - ▁add
250
+ - ou
251
+ - ▁again
252
+ - ▁ex
253
+ - is
254
+ - ct
255
+ - general_affirm
256
+ - general_confirm
257
+ - ▁song
258
+ - ▁next
259
+ - ▁j
260
+ - ▁meeting
261
+ - um
262
+ - ation
263
+ - ▁turn
264
+ - ▁did
265
+ - if
266
+ - ▁alarm
267
+ - am
268
+ - ▁like
269
+ - datetime_query
270
+ - ter
271
+ - ▁remind
272
+ - ▁o
273
+ - qa_definition
274
+ - ▁said
275
+ - ▁calendar
276
+ - ll
277
+ - se
278
+ - ers
279
+ - th
280
+ - ▁get
281
+ - our
282
+ - ▁need
283
+ - ▁all
284
+ - ot
285
+ - ▁want
286
+ - ▁off
287
+ - and
288
+ - ▁right
289
+ - ▁de
290
+ - ▁tr
291
+ - ut
292
+ - general_dontcare
293
+ - ▁
294
+ - ▁week
295
+ - as
296
+ - ▁tweet
297
+ - ight
298
+ - ir
299
+ - ▁your
300
+ - ▁event
301
+ - ▁news
302
+ - ▁se
303
+ - ay
304
+ - ion
305
+ - ▁com
306
+ - ▁there
307
+ - ▁ye
308
+ - ▁weather
309
+ - un
310
+ - ▁confirm
311
+ - ld
312
+ - calendar_remove
313
+ - ▁y
314
+ - ▁lights
315
+ - ▁more
316
+ - ▁v
317
+ - play_radio
318
+ - ▁does
319
+ - ▁po
320
+ - ▁now
321
+ - id
322
+ - email_querycontact
323
+ - ▁show
324
+ - ▁could
325
+ - ery
326
+ - op
327
+ - ▁day
328
+ - ▁pm
329
+ - ▁music
330
+ - ▁tomorrow
331
+ - ▁train
332
+ - ▁u
333
+ - ine
334
+ - ▁or
335
+ - ange
336
+ - qa_currency
337
+ - ice
338
+ - ▁contact
339
+ - ▁just
340
+ - ▁jo
341
+ - ▁think
342
+ - qa_stock
343
+ - end
344
+ - ss
345
+ - ber
346
+ - ▁tw
347
+ - ▁command
348
+ - ▁make
349
+ - ▁no
350
+ - ▁mo
351
+ - pe
352
+ - ▁find
353
+ - general_commandstop
354
+ - ▁when
355
+ - social_query
356
+ - ▁so
357
+ - ong
358
+ - ▁co
359
+ - ant
360
+ - ow
361
+ - ▁much
362
+ - ▁where
363
+ - ul
364
+ - ue
365
+ - ri
366
+ - ap
367
+ - ▁start
368
+ - ▁mar
369
+ - ▁by
370
+ - one
371
+ - ▁know
372
+ - ▁wor
373
+ - oo
374
+ - ▁give
375
+ - ▁let
376
+ - ▁events
377
+ - der
378
+ - ▁ro
379
+ - ▁pr
380
+ - ▁pl
381
+ - play_podcasts
382
+ - art
383
+ - us
384
+ - ▁work
385
+ - ▁current
386
+ - ol
387
+ - cooking_recipe
388
+ - nt
389
+ - ▁correct
390
+ - transport_query
391
+ - ia
392
+ - ▁stock
393
+ - ▁br
394
+ - ive
395
+ - ▁app
396
+ - ▁two
397
+ - ▁latest
398
+ - lists_query
399
+ - ▁some
400
+ - recommendation_events
401
+ - ab
402
+ - ▁go
403
+ - ▁but
404
+ - ook
405
+ - ke
406
+ - alarm_set
407
+ - play_audiobook
408
+ - ▁k
409
+ - ▁response
410
+ - ▁wr
411
+ - cast
412
+ - ▁open
413
+ - ▁cle
414
+ - ▁done
415
+ - ▁got
416
+ - ▁ca
417
+ - ite
418
+ - ase
419
+ - ▁thank
420
+ - iv
421
+ - ah
422
+ - ag
423
+ - ▁answer
424
+ - ie
425
+ - ▁five
426
+ - ▁book
427
+ - ist
428
+ - ▁rec
429
+ - ore
430
+ - ▁john
431
+ - ment
432
+ - ▁appreci
433
+ - ▁fri
434
+ - ack
435
+ - ▁remove
436
+ - ated
437
+ - ock
438
+ - ree
439
+ - j
440
+ - ▁good
441
+ - ▁many
442
+ - orn
443
+ - fe
444
+ - ▁radio
445
+ - ▁we
446
+ - int
447
+ - ▁facebook
448
+ - ▁cl
449
+ - ▁sev
450
+ - ▁schedule
451
+ - ard
452
+ - ▁per
453
+ - ▁li
454
+ - ▁going
455
+ - nd
456
+ - ain
457
+ - recommendation_locations
458
+ - ▁post
459
+ - lists_createoradd
460
+ - ff
461
+ - ▁su
462
+ - red
463
+ - iot_hue_lightoff
464
+ - lists_remove
465
+ - ▁ar
466
+ - een
467
+ - ▁say
468
+ - ro
469
+ - ▁volume
470
+ - ▁le
471
+ - ▁reply
472
+ - ▁complaint
473
+ - ▁out
474
+ - ▁delete
475
+ - ▁ne
476
+ - ame
477
+ - ▁detail
478
+ - ▁if
479
+ - im
480
+ - ▁happ
481
+ - orr
482
+ - ich
483
+ - em
484
+ - ▁ev
485
+ - ction
486
+ - ▁dollar
487
+ - ▁as
488
+ - alarm_query
489
+ - audio_volume_mute
490
+ - ac
491
+ - music_query
492
+ - ▁mon
493
+ - ther
494
+ - ▁thanks
495
+ - cel
496
+ - ▁who
497
+ - ave
498
+ - ▁service
499
+ - ▁mail
500
+ - ty
501
+ - ▁hear
502
+ - de
503
+ - ▁si
504
+ - ▁wh
505
+ - ood
506
+ - ell
507
+ - ▁con
508
+ - ▁once
509
+ - ound
510
+ - ▁don
511
+ - ▁loc
512
+ - ▁light
513
+ - ▁birthday
514
+ - ▁inf
515
+ - ort
516
+ - ffe
517
+ - ▁playlist
518
+ - el
519
+ - ening
520
+ - ▁us
521
+ - ▁un
522
+ - ▁has
523
+ - own
524
+ - ▁inc
525
+ - ai
526
+ - ▁speak
527
+ - age
528
+ - ▁mess
529
+ - ast
530
+ - ci
531
+ - ver
532
+ - ▁ten
533
+ - ▁underst
534
+ - ▁pro
535
+ - ▁q
536
+ - enty
537
+ - ▁ticket
538
+ - gh
539
+ - audio_volume_up
540
+ - ▁take
541
+ - ▁bo
542
+ - ally
543
+ - ome
544
+ - transport_ticket
545
+ - ind
546
+ - iot_hue_lightchange
547
+ - pp
548
+ - iot_coffee
549
+ - ▁res
550
+ - plain
551
+ - io
552
+ - lar
553
+ - takeaway_query
554
+ - ge
555
+ - takeaway_order
556
+ - email_addcontact
557
+ - play_game
558
+ - ak
559
+ - ▁fa
560
+ - transport_traffic
561
+ - music_likeness
562
+ - ▁rep
563
+ - act
564
+ - ust
565
+ - transport_taxi
566
+ - iot_hue_lightdim
567
+ - ▁mu
568
+ - ▁ti
569
+ - ick
570
+ - ▁ha
571
+ - ould
572
+ - general_joke
573
+ - '1'
574
+ - qa_maths
575
+ - ▁lo
576
+ - iot_cleaning
577
+ - q
578
+ - ake
579
+ - ill
580
+ - her
581
+ - iot_hue_lightup
582
+ - pl
583
+ - '2'
584
+ - alarm_remove
585
+ - orrect
586
+ - ▁cont
587
+ - mail
588
+ - out
589
+ - audio_volume_down
590
+ - book
591
+ - ail
592
+ - recommendation_movies
593
+ - ck
594
+ - ▁man
595
+ - ▁mus
596
+ - ▁che
597
+ - me
598
+ - ume
599
+ - ▁answ
600
+ - datetime_convert
601
+ - ▁late
602
+ - iot_wemo_on
603
+ - ▁twe
604
+ - music_settings
605
+ - iot_wemo_off
606
+ - orre
607
+ - ith
608
+ - ▁tom
609
+ - ▁fr
610
+ - ere
611
+ - ▁ad
612
+ - xt
613
+ - ▁ab
614
+ - ank
615
+ - general_greet
616
+ - now
617
+ - ▁meet
618
+ - ▁curre
619
+ - ▁respon
620
+ - ▁ag
621
+ - ght
622
+ - audio_volume_other
623
+ - ink
624
+ - ▁spe
625
+ - iot_hue_lighton
626
+ - ▁rem
627
+ - lly
628
+ - '?'
629
+ - urn
630
+ - ▁op
631
+ - ▁complain
632
+ - ▁comm
633
+ - let
634
+ - music_dislikeness
635
+ - ove
636
+ - ▁sch
637
+ - ather
638
+ - ▁rad
639
+ - edule
640
+ - ▁under
641
+ - icket
642
+ - lease
643
+ - ▁bir
644
+ - erv
645
+ - ▁birth
646
+ - ▁face
647
+ - ▁cur
648
+ - sw
649
+ - ▁serv
650
+ - ek
651
+ - aid
652
+ - '9'
653
+ - ▁vol
654
+ - edu
655
+ - '5'
656
+ - cooking_query
657
+ - lete
658
+ - ▁joh
659
+ - ▁det
660
+ - firm
661
+ - nder
662
+ - '0'
663
+ - irm
664
+ - '8'
665
+ - '&'
666
+ - _
667
+ - list
668
+ - pon
669
+ - qa_query
670
+ - '7'
671
+ - '3'
672
+ - '-'
673
+ - reci
674
+ - ▁doll
675
+ - <sos/eos>
676
+ init: null
677
+ input_size: null
678
+ ctc_conf:
679
+ dropout_rate: 0.0
680
+ ctc_type: builtin
681
+ reduce: true
682
+ ignore_nan_grad: true
683
+ model_conf:
684
+ ctc_weight: 0.3
685
+ lsm_weight: 0.1
686
+ length_normalized_loss: false
687
+ extract_feats_in_collect_stats: false
688
+ use_preprocessor: true
689
+ token_type: word
690
+ bpemodel: null
691
+ non_linguistic_symbols: null
692
+ cleaner: null
693
+ g2p: null
694
+ speech_volume_normalize: null
695
+ rir_scp: null
696
+ rir_apply_prob: 1.0
697
+ noise_scp: null
698
+ noise_apply_prob: 1.0
699
+ noise_db_range: '13_15'
700
+ frontend: default
701
+ frontend_conf:
702
+ fs: 16k
703
+ specaug: specaug
704
+ specaug_conf:
705
+ apply_time_warp: true
706
+ time_warp_window: 5
707
+ time_warp_mode: bicubic
708
+ apply_freq_mask: true
709
+ freq_mask_width_range:
710
+ - 0
711
+ - 30
712
+ num_freq_mask: 2
713
+ apply_time_mask: true
714
+ time_mask_width_range:
715
+ - 0
716
+ - 40
717
+ num_time_mask: 2
718
+ normalize: utterance_mvn
719
+ normalize_conf: {}
720
+ preencoder: null
721
+ preencoder_conf: {}
722
+ encoder: conformer
723
+ encoder_conf:
724
+ output_size: 512
725
+ attention_heads: 8
726
+ linear_units: 2048
727
+ num_blocks: 12
728
+ dropout_rate: 0.1
729
+ positional_dropout_rate: 0.1
730
+ attention_dropout_rate: 0.1
731
+ input_layer: conv2d
732
+ normalize_before: true
733
+ macaron_style: true
734
+ pos_enc_layer_type: rel_pos
735
+ selfattention_layer_type: rel_selfattn
736
+ activation_type: swish
737
+ use_cnn_module: true
738
+ cnn_module_kernel: 31
739
+ postencoder: null
740
+ postencoder_conf: {}
741
+ decoder: transformer
742
+ decoder_conf:
743
+ attention_heads: 8
744
+ linear_units: 2048
745
+ num_blocks: 6
746
+ dropout_rate: 0.1
747
+ positional_dropout_rate: 0.1
748
+ self_attention_dropout_rate: 0.1
749
+ src_attention_dropout_rate: 0.1
750
+ required:
751
+ - output_dir
752
+ - token_list
753
+ version: 0.10.3a2
754
+ distributed: false
slurp/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2144a9a26a71d820fd5923124e33cf8be4accf7e731e3c72526805350806d9
3
+ size 437699867