MINAMONI skytnt commited on
Commit
9c96d79
0 Parent(s):

Duplicate from skytnt/moe-tts

Browse files

Co-authored-by: skytnt <[email protected]>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +35 -0
  2. .gitignore +365 -0
  3. README.md +14 -0
  4. app.py +320 -0
  5. attentions.py +300 -0
  6. chinese_dialect_lexicons/changzhou.json +23 -0
  7. chinese_dialect_lexicons/changzhou.ocd2 +3 -0
  8. chinese_dialect_lexicons/cixi.json +23 -0
  9. chinese_dialect_lexicons/cixi.ocd2 +3 -0
  10. chinese_dialect_lexicons/fuyang.json +23 -0
  11. chinese_dialect_lexicons/fuyang.ocd2 +3 -0
  12. chinese_dialect_lexicons/hangzhou.json +19 -0
  13. chinese_dialect_lexicons/hangzhou.ocd2 +3 -0
  14. chinese_dialect_lexicons/jiading.json +23 -0
  15. chinese_dialect_lexicons/jiading.ocd2 +3 -0
  16. chinese_dialect_lexicons/jiashan.json +23 -0
  17. chinese_dialect_lexicons/jiashan.ocd2 +3 -0
  18. chinese_dialect_lexicons/jingjiang.json +23 -0
  19. chinese_dialect_lexicons/jingjiang.ocd2 +3 -0
  20. chinese_dialect_lexicons/jyutjyu.json +19 -0
  21. chinese_dialect_lexicons/jyutjyu.ocd2 +3 -0
  22. chinese_dialect_lexicons/linping.json +23 -0
  23. chinese_dialect_lexicons/linping.ocd2 +3 -0
  24. chinese_dialect_lexicons/ningbo.json +19 -0
  25. chinese_dialect_lexicons/ningbo.ocd2 +3 -0
  26. chinese_dialect_lexicons/pinghu.json +23 -0
  27. chinese_dialect_lexicons/pinghu.ocd2 +3 -0
  28. chinese_dialect_lexicons/ruao.json +23 -0
  29. chinese_dialect_lexicons/ruao.ocd2 +3 -0
  30. chinese_dialect_lexicons/sanmen.json +23 -0
  31. chinese_dialect_lexicons/sanmen.ocd2 +3 -0
  32. chinese_dialect_lexicons/shaoxing.json +23 -0
  33. chinese_dialect_lexicons/shaoxing.ocd2 +3 -0
  34. chinese_dialect_lexicons/suichang.json +23 -0
  35. chinese_dialect_lexicons/suichang.ocd2 +3 -0
  36. chinese_dialect_lexicons/suzhou.json +19 -0
  37. chinese_dialect_lexicons/suzhou.ocd2 +3 -0
  38. chinese_dialect_lexicons/tiantai.json +23 -0
  39. chinese_dialect_lexicons/tiantai.ocd2 +3 -0
  40. chinese_dialect_lexicons/tongxiang.json +23 -0
  41. chinese_dialect_lexicons/tongxiang.ocd2 +3 -0
  42. chinese_dialect_lexicons/wenzhou.json +23 -0
  43. chinese_dialect_lexicons/wenzhou.ocd2 +3 -0
  44. chinese_dialect_lexicons/wuxi.json +19 -0
  45. chinese_dialect_lexicons/wuxi.ocd2 +3 -0
  46. chinese_dialect_lexicons/xiaoshan.json +23 -0
  47. chinese_dialect_lexicons/xiaoshan.ocd2 +3 -0
  48. chinese_dialect_lexicons/xiashi.json +19 -0
  49. chinese_dialect_lexicons/xiashi.ocd2 +3 -0
  50. chinese_dialect_lexicons/yixing.json +19 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
+ *.onnx filter=lfs diff=lfs merge=lfs -text
15
+ *.ot filter=lfs diff=lfs merge=lfs -text
16
+ *.parquet filter=lfs diff=lfs merge=lfs -text
17
+ *.pickle filter=lfs diff=lfs merge=lfs -text
18
+ *.pkl filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pt filter=lfs diff=lfs merge=lfs -text
21
+ *.pth filter=lfs diff=lfs merge=lfs -text
22
+ *.rar filter=lfs diff=lfs merge=lfs -text
23
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
25
+ *.tflite filter=lfs diff=lfs merge=lfs -text
26
+ *.tgz filter=lfs diff=lfs merge=lfs -text
27
+ *.wasm filter=lfs diff=lfs merge=lfs -text
28
+ *.xz filter=lfs diff=lfs merge=lfs -text
29
+ *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
31
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ *.o filter=lfs diff=lfs merge=lfs -text
33
+ *.dll filter=lfs diff=lfs merge=lfs -text
34
+ *.so filter=lfs diff=lfs merge=lfs -text
35
+ *.ocd2 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Ignore Visual Studio temporary files, build results, and
2
+ ## files generated by popular Visual Studio add-ons.
3
+ ##
4
+ ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5
+
6
+ # User-specific files
7
+ *.rsuser
8
+ *.suo
9
+ *.user
10
+ *.userosscache
11
+ *.sln.docstates
12
+
13
+ # User-specific files (MonoDevelop/Xamarin Studio)
14
+ *.userprefs
15
+
16
+ # Mono auto generated files
17
+ mono_crash.*
18
+
19
+ # Build results
20
+ [Dd]ebug/
21
+ [Dd]ebugPublic/
22
+ [Rr]elease/
23
+ [Rr]eleases/
24
+ x64/
25
+ x86/
26
+ [Ww][Ii][Nn]32/
27
+ [Aa][Rr][Mm]/
28
+ [Aa][Rr][Mm]64/
29
+ bld/
30
+ [Bb]in/
31
+ [Oo]bj/
32
+ [Oo]ut/
33
+ [Ll]og/
34
+ [Ll]ogs/
35
+
36
+ # Visual Studio 2015/2017 cache/options directory
37
+ .vs/
38
+ # Uncomment if you have tasks that create the project's static files in wwwroot
39
+ #wwwroot/
40
+
41
+ # Visual Studio 2017 auto generated files
42
+ Generated\ Files/
43
+
44
+ # MSTest test Results
45
+ [Tt]est[Rr]esult*/
46
+ [Bb]uild[Ll]og.*
47
+
48
+ # NUnit
49
+ *.VisualState.xml
50
+ TestResult.xml
51
+ nunit-*.xml
52
+
53
+ # Build Results of an ATL Project
54
+ [Dd]ebugPS/
55
+ [Rr]eleasePS/
56
+ dlldata.c
57
+
58
+ # Benchmark Results
59
+ BenchmarkDotNet.Artifacts/
60
+
61
+ # .NET Core
62
+ project.lock.json
63
+ project.fragment.lock.json
64
+ artifacts/
65
+
66
+ # ASP.NET Scaffolding
67
+ ScaffoldingReadMe.txt
68
+
69
+ # StyleCop
70
+ StyleCopReport.xml
71
+
72
+ # Files built by Visual Studio
73
+ *_i.c
74
+ *_p.c
75
+ *_h.h
76
+ *.ilk
77
+ *.meta
78
+ *.obj
79
+ *.iobj
80
+ *.pch
81
+ *.pdb
82
+ *.ipdb
83
+ *.pgc
84
+ *.pgd
85
+ *.rsp
86
+ *.sbr
87
+ *.tlb
88
+ *.tli
89
+ *.tlh
90
+ *.tmp
91
+ *.tmp_proj
92
+ *_wpftmp.csproj
93
+ *.log
94
+ *.vspscc
95
+ *.vssscc
96
+ .builds
97
+ *.pidb
98
+ *.svclog
99
+ *.scc
100
+
101
+ # Chutzpah Test files
102
+ _Chutzpah*
103
+
104
+ # Visual C++ cache files
105
+ ipch/
106
+ *.aps
107
+ *.ncb
108
+ *.opendb
109
+ *.opensdf
110
+ *.sdf
111
+ *.cachefile
112
+ *.VC.db
113
+ *.VC.VC.opendb
114
+
115
+ # Visual Studio profiler
116
+ *.psess
117
+ *.vsp
118
+ *.vspx
119
+ *.sap
120
+
121
+ # Visual Studio Trace Files
122
+ *.e2e
123
+
124
+ # TFS 2012 Local Workspace
125
+ $tf/
126
+
127
+ # Guidance Automation Toolkit
128
+ *.gpState
129
+
130
+ # ReSharper is a .NET coding add-in
131
+ _ReSharper*/
132
+ *.[Rr]e[Ss]harper
133
+ *.DotSettings.user
134
+
135
+ # TeamCity is a build add-in
136
+ _TeamCity*
137
+
138
+ # DotCover is a Code Coverage Tool
139
+ *.dotCover
140
+
141
+ # AxoCover is a Code Coverage Tool
142
+ .axoCover/*
143
+ !.axoCover/settings.json
144
+
145
+ # Coverlet is a free, cross platform Code Coverage Tool
146
+ coverage*.json
147
+ coverage*.xml
148
+ coverage*.info
149
+
150
+ # Visual Studio code coverage results
151
+ *.coverage
152
+ *.coveragexml
153
+
154
+ # NCrunch
155
+ _NCrunch_*
156
+ .*crunch*.local.xml
157
+ nCrunchTemp_*
158
+
159
+ # MightyMoose
160
+ *.mm.*
161
+ AutoTest.Net/
162
+
163
+ # Web workbench (sass)
164
+ .sass-cache/
165
+
166
+ # Installshield output folder
167
+ [Ee]xpress/
168
+
169
+ # DocProject is a documentation generator add-in
170
+ DocProject/buildhelp/
171
+ DocProject/Help/*.HxT
172
+ DocProject/Help/*.HxC
173
+ DocProject/Help/*.hhc
174
+ DocProject/Help/*.hhk
175
+ DocProject/Help/*.hhp
176
+ DocProject/Help/Html2
177
+ DocProject/Help/html
178
+
179
+ # Click-Once directory
180
+ publish/
181
+
182
+ # Publish Web Output
183
+ *.[Pp]ublish.xml
184
+ *.azurePubxml
185
+ # Note: Comment the next line if you want to checkin your web deploy settings,
186
+ # but database connection strings (with potential passwords) will be unencrypted
187
+ *.pubxml
188
+ *.publishproj
189
+
190
+ # Microsoft Azure Web App publish settings. Comment the next line if you want to
191
+ # checkin your Azure Web App publish settings, but sensitive information contained
192
+ # in these scripts will be unencrypted
193
+ PublishScripts/
194
+
195
+ # NuGet Packages
196
+ *.nupkg
197
+ # NuGet Symbol Packages
198
+ *.snupkg
199
+ # The packages folder can be ignored because of Package Restore
200
+ **/[Pp]ackages/*
201
+ # except build/, which is used as an MSBuild target.
202
+ !**/[Pp]ackages/build/
203
+ # Uncomment if necessary however generally it will be regenerated when needed
204
+ #!**/[Pp]ackages/repositories.config
205
+ # NuGet v3's project.json files produces more ignorable files
206
+ *.nuget.props
207
+ *.nuget.targets
208
+
209
+ # Microsoft Azure Build Output
210
+ csx/
211
+ *.build.csdef
212
+
213
+ # Microsoft Azure Emulator
214
+ ecf/
215
+ rcf/
216
+
217
+ # Windows Store app package directories and files
218
+ AppPackages/
219
+ BundleArtifacts/
220
+ Package.StoreAssociation.xml
221
+ _pkginfo.txt
222
+ *.appx
223
+ *.appxbundle
224
+ *.appxupload
225
+
226
+ # Visual Studio cache files
227
+ # files ending in .cache can be ignored
228
+ *.[Cc]ache
229
+ # but keep track of directories ending in .cache
230
+ !?*.[Cc]ache/
231
+
232
+ # Others
233
+ ClientBin/
234
+ ~$*
235
+ *~
236
+ *.dbmdl
237
+ *.dbproj.schemaview
238
+ *.jfm
239
+ *.pfx
240
+ *.publishsettings
241
+ orleans.codegen.cs
242
+
243
+ # Including strong name files can present a security risk
244
+ # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245
+ #*.snk
246
+
247
+ # Since there are multiple workflows, uncomment next line to ignore bower_components
248
+ # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249
+ #bower_components/
250
+
251
+ # RIA/Silverlight projects
252
+ Generated_Code/
253
+
254
+ # Backup & report files from converting an old project file
255
+ # to a newer Visual Studio version. Backup files are not needed,
256
+ # because we have git ;-)
257
+ _UpgradeReport_Files/
258
+ Backup*/
259
+ UpgradeLog*.XML
260
+ UpgradeLog*.htm
261
+ ServiceFabricBackup/
262
+ *.rptproj.bak
263
+
264
+ # SQL Server files
265
+ *.mdf
266
+ *.ldf
267
+ *.ndf
268
+
269
+ # Business Intelligence projects
270
+ *.rdl.data
271
+ *.bim.layout
272
+ *.bim_*.settings
273
+ *.rptproj.rsuser
274
+ *- [Bb]ackup.rdl
275
+ *- [Bb]ackup ([0-9]).rdl
276
+ *- [Bb]ackup ([0-9][0-9]).rdl
277
+
278
+ # Microsoft Fakes
279
+ FakesAssemblies/
280
+
281
+ # GhostDoc plugin setting file
282
+ *.GhostDoc.xml
283
+
284
+ # Node.js Tools for Visual Studio
285
+ .ntvs_analysis.dat
286
+ node_modules/
287
+
288
+ # Visual Studio 6 build log
289
+ *.plg
290
+
291
+ # Visual Studio 6 workspace options file
292
+ *.opt
293
+
294
+ # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295
+ *.vbw
296
+
297
+ # Visual Studio LightSwitch build output
298
+ **/*.HTMLClient/GeneratedArtifacts
299
+ **/*.DesktopClient/GeneratedArtifacts
300
+ **/*.DesktopClient/ModelManifest.xml
301
+ **/*.Server/GeneratedArtifacts
302
+ **/*.Server/ModelManifest.xml
303
+ _Pvt_Extensions
304
+
305
+ # Paket dependency manager
306
+ .paket/paket.exe
307
+ paket-files/
308
+
309
+ # FAKE - F# Make
310
+ .fake/
311
+
312
+ # CodeRush personal settings
313
+ .cr/personal
314
+
315
+ # Python Tools for Visual Studio (PTVS)
316
+ __pycache__/
317
+ *.pyc
318
+
319
+ # Cake - Uncomment if you are using it
320
+ # tools/**
321
+ # !tools/packages.config
322
+
323
+ # Tabs Studio
324
+ *.tss
325
+
326
+ # Telerik's JustMock configuration file
327
+ *.jmconfig
328
+
329
+ # BizTalk build output
330
+ *.btp.cs
331
+ *.btm.cs
332
+ *.odx.cs
333
+ *.xsd.cs
334
+
335
+ # OpenCover UI analysis results
336
+ OpenCover/
337
+
338
+ # Azure Stream Analytics local run output
339
+ ASALocalRun/
340
+
341
+ # MSBuild Binary and Structured Log
342
+ *.binlog
343
+
344
+ # NVidia Nsight GPU debugger configuration file
345
+ *.nvuser
346
+
347
+ # MFractors (Xamarin productivity tool) working folder
348
+ .mfractor/
349
+
350
+ # Local History for Visual Studio
351
+ .localhistory/
352
+
353
+ # BeatPulse healthcheck temp database
354
+ healthchecksdb
355
+
356
+ # Backup folder for Package Reference Convert tool in Visual Studio 2017
357
+ MigrationBackup/
358
+
359
+ # Ionide (cross platform F# VS Code tools) working folder
360
+ .ionide/
361
+
362
+ # Fody - auto-generated XML schema
363
+ FodyWeavers.xsd
364
+
365
+ .idea/
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Moe TTS
3
+ emoji: 😊🎙️
4
+ colorFrom: red
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 3.16.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ duplicated_from: skytnt/moe-tts
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import os
4
+ import re
5
+ import tempfile
6
+ from pathlib import Path
7
+
8
+ import librosa
9
+ import numpy as np
10
+ import torch
11
+ from torch import no_grad, LongTensor
12
+ import commons
13
+ import utils
14
+ import gradio as gr
15
+ import gradio.utils as gr_utils
16
+ import gradio.processing_utils as gr_processing_utils
17
+ from models import SynthesizerTrn
18
+ from text import text_to_sequence, _clean_text
19
+ from mel_processing import spectrogram_torch
20
+
21
+ limitation = os.getenv("SYSTEM") == "spaces" # limit text and audio length in huggingface spaces
22
+
23
+ audio_postprocess_ori = gr.Audio.postprocess
24
+
25
+
26
+ def audio_postprocess(self, y):
27
+ data = audio_postprocess_ori(self, y)
28
+ if data is None:
29
+ return None
30
+ return gr_processing_utils.encode_url_or_file_to_base64(data["name"])
31
+
32
+
33
+ gr.Audio.postprocess = audio_postprocess
34
+
35
+
36
+ def get_text(text, hps, is_symbol):
37
+ text_norm = text_to_sequence(text, hps.symbols, [] if is_symbol else hps.data.text_cleaners)
38
+ if hps.data.add_blank:
39
+ text_norm = commons.intersperse(text_norm, 0)
40
+ text_norm = LongTensor(text_norm)
41
+ return text_norm
42
+
43
+
44
+ def create_tts_fn(model, hps, speaker_ids):
45
+ def tts_fn(text, speaker, speed, is_symbol):
46
+ if limitation:
47
+ text_len = len(re.sub("\[([A-Z]{2})\]", "", text))
48
+ max_len = 150
49
+ if is_symbol:
50
+ max_len *= 3
51
+ if text_len > max_len:
52
+ return "Error: Text is too long", None
53
+
54
+ speaker_id = speaker_ids[speaker]
55
+ stn_tst = get_text(text, hps, is_symbol)
56
+ with no_grad():
57
+ x_tst = stn_tst.unsqueeze(0).to(device)
58
+ x_tst_lengths = LongTensor([stn_tst.size(0)]).to(device)
59
+ sid = LongTensor([speaker_id]).to(device)
60
+ audio = model.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8,
61
+ length_scale=1.0 / speed)[0][0, 0].data.cpu().float().numpy()
62
+ del stn_tst, x_tst, x_tst_lengths, sid
63
+ return "Success", (hps.data.sampling_rate, audio)
64
+
65
+ return tts_fn
66
+
67
+
68
+ def create_vc_fn(model, hps, speaker_ids):
69
+ def vc_fn(original_speaker, target_speaker, input_audio):
70
+ if input_audio is None:
71
+ return "You need to upload an audio", None
72
+ sampling_rate, audio = input_audio
73
+ duration = audio.shape[0] / sampling_rate
74
+ if limitation and duration > 30:
75
+ return "Error: Audio is too long", None
76
+ original_speaker_id = speaker_ids[original_speaker]
77
+ target_speaker_id = speaker_ids[target_speaker]
78
+
79
+ audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
80
+ if len(audio.shape) > 1:
81
+ audio = librosa.to_mono(audio.transpose(1, 0))
82
+ if sampling_rate != hps.data.sampling_rate:
83
+ audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=hps.data.sampling_rate)
84
+ with no_grad():
85
+ y = torch.FloatTensor(audio)
86
+ y = y.unsqueeze(0)
87
+ spec = spectrogram_torch(y, hps.data.filter_length,
88
+ hps.data.sampling_rate, hps.data.hop_length, hps.data.win_length,
89
+ center=False).to(device)
90
+ spec_lengths = LongTensor([spec.size(-1)]).to(device)
91
+ sid_src = LongTensor([original_speaker_id]).to(device)
92
+ sid_tgt = LongTensor([target_speaker_id]).to(device)
93
+ audio = model.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt)[0][
94
+ 0, 0].data.cpu().float().numpy()
95
+ del y, spec, spec_lengths, sid_src, sid_tgt
96
+ return "Success", (hps.data.sampling_rate, audio)
97
+
98
+ return vc_fn
99
+
100
+
101
+ def create_soft_vc_fn(model, hps, speaker_ids):
102
+ def soft_vc_fn(target_speaker, input_audio1, input_audio2):
103
+ input_audio = input_audio1
104
+ if input_audio is None:
105
+ input_audio = input_audio2
106
+ if input_audio is None:
107
+ return "You need to upload an audio", None
108
+ sampling_rate, audio = input_audio
109
+ duration = audio.shape[0] / sampling_rate
110
+ if limitation and duration > 30:
111
+ return "Error: Audio is too long", None
112
+ target_speaker_id = speaker_ids[target_speaker]
113
+
114
+ audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
115
+ if len(audio.shape) > 1:
116
+ audio = librosa.to_mono(audio.transpose(1, 0))
117
+ if sampling_rate != 16000:
118
+ audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
119
+ with torch.inference_mode():
120
+ units = hubert.units(torch.FloatTensor(audio).unsqueeze(0).unsqueeze(0).to(device))
121
+ with no_grad():
122
+ unit_lengths = LongTensor([units.size(1)]).to(device)
123
+ sid = LongTensor([target_speaker_id]).to(device)
124
+ audio = model.infer(units, unit_lengths, sid=sid, noise_scale=.667,
125
+ noise_scale_w=0.8)[0][0, 0].data.cpu().float().numpy()
126
+ del units, unit_lengths, sid
127
+ return "Success", (hps.data.sampling_rate, audio)
128
+
129
+ return soft_vc_fn
130
+
131
+
132
+ def create_to_symbol_fn(hps):
133
+ def to_symbol_fn(is_symbol_input, input_text, temp_text):
134
+ return (_clean_text(input_text, hps.data.text_cleaners), input_text) if is_symbol_input \
135
+ else (temp_text, temp_text)
136
+
137
+ return to_symbol_fn
138
+
139
+
140
+ download_audio_js = """
141
+ () =>{{
142
+ let root = document.querySelector("body > gradio-app");
143
+ if (root.shadowRoot != null)
144
+ root = root.shadowRoot;
145
+ let audio = root.querySelector("#{audio_id}").querySelector("audio");
146
+ if (audio == undefined)
147
+ return;
148
+ audio = audio.src;
149
+ let oA = document.createElement("a");
150
+ oA.download = Math.floor(Math.random()*100000000)+'.wav';
151
+ oA.href = audio;
152
+ document.body.appendChild(oA);
153
+ oA.click();
154
+ oA.remove();
155
+ }}
156
+ """
157
+
158
+ if __name__ == '__main__':
159
+ parser = argparse.ArgumentParser()
160
+ parser.add_argument('--device', type=str, default='cpu')
161
+ parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
162
+ args = parser.parse_args()
163
+
164
+ device = torch.device(args.device)
165
+ models_tts = []
166
+ models_vc = []
167
+ models_soft_vc = []
168
+ with open("saved_model/info.json", "r", encoding="utf-8") as f:
169
+ models_info = json.load(f)
170
+ for i, info in models_info.items():
171
+ name = info["title"]
172
+ author = info["author"]
173
+ lang = info["lang"]
174
+ example = info["example"]
175
+ config_path = f"saved_model/{i}/config.json"
176
+ model_path = f"saved_model/{i}/model.pth"
177
+ cover = info["cover"]
178
+ cover_path = f"saved_model/{i}/{cover}" if cover else None
179
+ hps = utils.get_hparams_from_file(config_path)
180
+ model = SynthesizerTrn(
181
+ len(hps.symbols),
182
+ hps.data.filter_length // 2 + 1,
183
+ hps.train.segment_size // hps.data.hop_length,
184
+ n_speakers=hps.data.n_speakers,
185
+ **hps.model)
186
+ utils.load_checkpoint(model_path, model, None)
187
+ model.eval().to(device)
188
+ speaker_ids = [sid for sid, name in enumerate(hps.speakers) if name != "None"]
189
+ speakers = [name for sid, name in enumerate(hps.speakers) if name != "None"]
190
+
191
+ t = info["type"]
192
+ if t == "vits":
193
+ models_tts.append((name, author, cover_path, speakers, lang, example,
194
+ hps.symbols, create_tts_fn(model, hps, speaker_ids),
195
+ create_to_symbol_fn(hps)))
196
+ models_vc.append((name, author, cover_path, speakers, create_vc_fn(model, hps, speaker_ids)))
197
+ elif t == "soft-vits-vc":
198
+ models_soft_vc.append((name, author, cover_path, speakers, create_soft_vc_fn(model, hps, speaker_ids)))
199
+
200
+ hubert = torch.hub.load("bshall/hubert:main", "hubert_soft", trust_repo=True).to(device)
201
+
202
+ app = gr.Blocks()
203
+
204
+ with app:
205
+ gr.Markdown("# Moe TTS And Voice Conversion Using VITS Model\n\n"
206
+ "![visitor badge](https://visitor-badge.glitch.me/badge?page_id=skytnt.moegoe)\n\n"
207
+ "[Open In Colab]"
208
+ "(https://colab.research.google.com/drive/14Pb8lpmwZL-JI5Ub6jpG4sz2-8KS0kbS?usp=sharing)"
209
+ " without queue and length limitation.\n\n"
210
+ "Feel free to [open discussion](https://huggingface.co/spaces/skytnt/moe-tts/discussions/new) "
211
+ "if you want to add your model to this app.")
212
+ with gr.Tabs():
213
+ with gr.TabItem("TTS"):
214
+ with gr.Tabs():
215
+ for i, (name, author, cover_path, speakers, lang, example, symbols, tts_fn,
216
+ to_symbol_fn) in enumerate(models_tts):
217
+ with gr.TabItem(f"model{i}"):
218
+ with gr.Column():
219
+ cover_markdown = f"![cover](file/{cover_path})\n\n" if cover_path else ""
220
+ gr.Markdown(f"## {name}\n\n"
221
+ f"{cover_markdown}"
222
+ f"model author: {author}\n\n"
223
+ f"language: {lang}")
224
+ tts_input1 = gr.TextArea(label="Text (150 words limitation)", value=example,
225
+ elem_id=f"tts-input{i}")
226
+ tts_input2 = gr.Dropdown(label="Speaker", choices=speakers,
227
+ type="index", value=speakers[0])
228
+ tts_input3 = gr.Slider(label="Speed", value=1, minimum=0.5, maximum=2, step=0.1)
229
+ with gr.Accordion(label="Advanced Options", open=False):
230
+ temp_text_var = gr.Variable()
231
+ symbol_input = gr.Checkbox(value=False, label="Symbol input")
232
+ symbol_list = gr.Dataset(label="Symbol list", components=[tts_input1],
233
+ samples=[[x] for x in symbols],
234
+ elem_id=f"symbol-list{i}")
235
+ symbol_list_json = gr.Json(value=symbols, visible=False)
236
+ tts_submit = gr.Button("Generate", variant="primary")
237
+ tts_output1 = gr.Textbox(label="Output Message")
238
+ tts_output2 = gr.Audio(label="Output Audio", elem_id=f"tts-audio{i}")
239
+ download = gr.Button("Download Audio")
240
+ download.click(None, [], [], _js=download_audio_js.format(audio_id=f"tts-audio{i}"))
241
+
242
+ tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, symbol_input],
243
+ [tts_output1, tts_output2])
244
+ symbol_input.change(to_symbol_fn,
245
+ [symbol_input, tts_input1, temp_text_var],
246
+ [tts_input1, temp_text_var])
247
+ symbol_list.click(None, [symbol_list, symbol_list_json], [],
248
+ _js=f"""
249
+ (i,symbols) => {{
250
+ let root = document.querySelector("body > gradio-app");
251
+ if (root.shadowRoot != null)
252
+ root = root.shadowRoot;
253
+ let text_input = root.querySelector("#tts-input{i}").querySelector("textarea");
254
+ let startPos = text_input.selectionStart;
255
+ let endPos = text_input.selectionEnd;
256
+ let oldTxt = text_input.value;
257
+ let result = oldTxt.substring(0, startPos) + symbols[i] + oldTxt.substring(endPos);
258
+ text_input.value = result;
259
+ let x = window.scrollX, y = window.scrollY;
260
+ text_input.focus();
261
+ text_input.selectionStart = startPos + symbols[i].length;
262
+ text_input.selectionEnd = startPos + symbols[i].length;
263
+ text_input.blur();
264
+ window.scrollTo(x, y);
265
+ return [];
266
+ }}""")
267
+
268
+ with gr.TabItem("Voice Conversion"):
269
+ with gr.Tabs():
270
+ for i, (name, author, cover_path, speakers, vc_fn) in enumerate(models_vc):
271
+ with gr.TabItem(f"model{i}"):
272
+ cover_markdown = f"![cover](file/{cover_path})\n\n" if cover_path else ""
273
+ gr.Markdown(f"## {name}\n\n"
274
+ f"{cover_markdown}"
275
+ f"model author: {author}")
276
+ vc_input1 = gr.Dropdown(label="Original Speaker", choices=speakers, type="index",
277
+ value=speakers[0])
278
+ vc_input2 = gr.Dropdown(label="Target Speaker", choices=speakers, type="index",
279
+ value=speakers[min(len(speakers) - 1, 1)])
280
+ vc_input3 = gr.Audio(label="Input Audio (30s limitation)")
281
+ vc_submit = gr.Button("Convert", variant="primary")
282
+ vc_output1 = gr.Textbox(label="Output Message")
283
+ vc_output2 = gr.Audio(label="Output Audio", elem_id=f"vc-audio{i}")
284
+ download = gr.Button("Download Audio")
285
+ download.click(None, [], [], _js=download_audio_js.format(audio_id=f"vc-audio{i}"))
286
+ vc_submit.click(vc_fn, [vc_input1, vc_input2, vc_input3], [vc_output1, vc_output2])
287
+ with gr.TabItem("Soft Voice Conversion"):
288
+ with gr.Tabs():
289
+ for i, (name, author, cover_path, speakers, soft_vc_fn) in enumerate(models_soft_vc):
290
+ with gr.TabItem(f"model{i}"):
291
+ cover_markdown = f"![cover](file/{cover_path})\n\n" if cover_path else ""
292
+ gr.Markdown(f"## {name}\n\n"
293
+ f"{cover_markdown}"
294
+ f"model author: {author}")
295
+ vc_input1 = gr.Dropdown(label="Target Speaker", choices=speakers, type="index",
296
+ value=speakers[0])
297
+ source_tabs = gr.Tabs()
298
+ with source_tabs:
299
+ with gr.TabItem("microphone"):
300
+ vc_input2 = gr.Audio(label="Input Audio (30s limitation)", source="microphone")
301
+ with gr.TabItem("upload"):
302
+ vc_input3 = gr.Audio(label="Input Audio (30s limitation)", source="upload")
303
+ vc_submit = gr.Button("Convert", variant="primary")
304
+ vc_output1 = gr.Textbox(label="Output Message")
305
+ vc_output2 = gr.Audio(label="Output Audio", elem_id=f"svc-audio{i}")
306
+ download = gr.Button("Download Audio")
307
+ download.click(None, [], [], _js=download_audio_js.format(audio_id=f"svc-audio{i}"))
308
+ # clear inputs
309
+ source_tabs.set_event_trigger("change", None, [], [vc_input2, vc_input3],
310
+ js="()=>[null,null]")
311
+ vc_submit.click(soft_vc_fn, [vc_input1, vc_input2, vc_input3],
312
+ [vc_output1, vc_output2])
313
+ gr.Markdown(
314
+ "unofficial demo for \n\n"
315
+ "- [https://github.com/CjangCjengh/MoeGoe](https://github.com/CjangCjengh/MoeGoe)\n"
316
+ "- [https://github.com/Francis-Komizu/VITS](https://github.com/Francis-Komizu/VITS)\n"
317
+ "- [https://github.com/luoyily/MoeTTS](https://github.com/luoyily/MoeTTS)\n"
318
+ "- [https://github.com/Francis-Komizu/Sovits](https://github.com/Francis-Komizu/Sovits)"
319
+ )
320
+ app.queue(concurrency_count=3).launch(show_api=False, share=args.share)
attentions.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import torch
3
+ from torch import nn
4
+ from torch.nn import functional as F
5
+
6
+ import commons
7
+ from modules import LayerNorm
8
+
9
+
10
+ class Encoder(nn.Module):
11
+ def __init__(self, hidden_channels, filter_channels, n_heads, n_layers, kernel_size=1, p_dropout=0., window_size=4, **kwargs):
12
+ super().__init__()
13
+ self.hidden_channels = hidden_channels
14
+ self.filter_channels = filter_channels
15
+ self.n_heads = n_heads
16
+ self.n_layers = n_layers
17
+ self.kernel_size = kernel_size
18
+ self.p_dropout = p_dropout
19
+ self.window_size = window_size
20
+
21
+ self.drop = nn.Dropout(p_dropout)
22
+ self.attn_layers = nn.ModuleList()
23
+ self.norm_layers_1 = nn.ModuleList()
24
+ self.ffn_layers = nn.ModuleList()
25
+ self.norm_layers_2 = nn.ModuleList()
26
+ for i in range(self.n_layers):
27
+ self.attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout, window_size=window_size))
28
+ self.norm_layers_1.append(LayerNorm(hidden_channels))
29
+ self.ffn_layers.append(FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout))
30
+ self.norm_layers_2.append(LayerNorm(hidden_channels))
31
+
32
+ def forward(self, x, x_mask):
33
+ attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
34
+ x = x * x_mask
35
+ for i in range(self.n_layers):
36
+ y = self.attn_layers[i](x, x, attn_mask)
37
+ y = self.drop(y)
38
+ x = self.norm_layers_1[i](x + y)
39
+
40
+ y = self.ffn_layers[i](x, x_mask)
41
+ y = self.drop(y)
42
+ x = self.norm_layers_2[i](x + y)
43
+ x = x * x_mask
44
+ return x
45
+
46
+
47
+ class Decoder(nn.Module):
48
+ def __init__(self, hidden_channels, filter_channels, n_heads, n_layers, kernel_size=1, p_dropout=0., proximal_bias=False, proximal_init=True, **kwargs):
49
+ super().__init__()
50
+ self.hidden_channels = hidden_channels
51
+ self.filter_channels = filter_channels
52
+ self.n_heads = n_heads
53
+ self.n_layers = n_layers
54
+ self.kernel_size = kernel_size
55
+ self.p_dropout = p_dropout
56
+ self.proximal_bias = proximal_bias
57
+ self.proximal_init = proximal_init
58
+
59
+ self.drop = nn.Dropout(p_dropout)
60
+ self.self_attn_layers = nn.ModuleList()
61
+ self.norm_layers_0 = nn.ModuleList()
62
+ self.encdec_attn_layers = nn.ModuleList()
63
+ self.norm_layers_1 = nn.ModuleList()
64
+ self.ffn_layers = nn.ModuleList()
65
+ self.norm_layers_2 = nn.ModuleList()
66
+ for i in range(self.n_layers):
67
+ self.self_attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout, proximal_bias=proximal_bias, proximal_init=proximal_init))
68
+ self.norm_layers_0.append(LayerNorm(hidden_channels))
69
+ self.encdec_attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout))
70
+ self.norm_layers_1.append(LayerNorm(hidden_channels))
71
+ self.ffn_layers.append(FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout, causal=True))
72
+ self.norm_layers_2.append(LayerNorm(hidden_channels))
73
+
74
+ def forward(self, x, x_mask, h, h_mask):
75
+ """
76
+ x: decoder input
77
+ h: encoder output
78
+ """
79
+ self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(device=x.device, dtype=x.dtype)
80
+ encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
81
+ x = x * x_mask
82
+ for i in range(self.n_layers):
83
+ y = self.self_attn_layers[i](x, x, self_attn_mask)
84
+ y = self.drop(y)
85
+ x = self.norm_layers_0[i](x + y)
86
+
87
+ y = self.encdec_attn_layers[i](x, h, encdec_attn_mask)
88
+ y = self.drop(y)
89
+ x = self.norm_layers_1[i](x + y)
90
+
91
+ y = self.ffn_layers[i](x, x_mask)
92
+ y = self.drop(y)
93
+ x = self.norm_layers_2[i](x + y)
94
+ x = x * x_mask
95
+ return x
96
+
97
+
98
+ class MultiHeadAttention(nn.Module):
99
+ def __init__(self, channels, out_channels, n_heads, p_dropout=0., window_size=None, heads_share=True, block_length=None, proximal_bias=False, proximal_init=False):
100
+ super().__init__()
101
+ assert channels % n_heads == 0
102
+
103
+ self.channels = channels
104
+ self.out_channels = out_channels
105
+ self.n_heads = n_heads
106
+ self.p_dropout = p_dropout
107
+ self.window_size = window_size
108
+ self.heads_share = heads_share
109
+ self.block_length = block_length
110
+ self.proximal_bias = proximal_bias
111
+ self.proximal_init = proximal_init
112
+ self.attn = None
113
+
114
+ self.k_channels = channels // n_heads
115
+ self.conv_q = nn.Conv1d(channels, channels, 1)
116
+ self.conv_k = nn.Conv1d(channels, channels, 1)
117
+ self.conv_v = nn.Conv1d(channels, channels, 1)
118
+ self.conv_o = nn.Conv1d(channels, out_channels, 1)
119
+ self.drop = nn.Dropout(p_dropout)
120
+
121
+ if window_size is not None:
122
+ n_heads_rel = 1 if heads_share else n_heads
123
+ rel_stddev = self.k_channels**-0.5
124
+ self.emb_rel_k = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev)
125
+ self.emb_rel_v = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev)
126
+
127
+ nn.init.xavier_uniform_(self.conv_q.weight)
128
+ nn.init.xavier_uniform_(self.conv_k.weight)
129
+ nn.init.xavier_uniform_(self.conv_v.weight)
130
+ if proximal_init:
131
+ with torch.no_grad():
132
+ self.conv_k.weight.copy_(self.conv_q.weight)
133
+ self.conv_k.bias.copy_(self.conv_q.bias)
134
+
135
+ def forward(self, x, c, attn_mask=None):
136
+ q = self.conv_q(x)
137
+ k = self.conv_k(c)
138
+ v = self.conv_v(c)
139
+
140
+ x, self.attn = self.attention(q, k, v, mask=attn_mask)
141
+
142
+ x = self.conv_o(x)
143
+ return x
144
+
145
+ def attention(self, query, key, value, mask=None):
146
+ # reshape [b, d, t] -> [b, n_h, t, d_k]
147
+ b, d, t_s, t_t = (*key.size(), query.size(2))
148
+ query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3)
149
+ key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
150
+ value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
151
+
152
+ scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1))
153
+ if self.window_size is not None:
154
+ assert t_s == t_t, "Relative attention is only available for self-attention."
155
+ key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s)
156
+ rel_logits = self._matmul_with_relative_keys(query /math.sqrt(self.k_channels), key_relative_embeddings)
157
+ scores_local = self._relative_position_to_absolute_position(rel_logits)
158
+ scores = scores + scores_local
159
+ if self.proximal_bias:
160
+ assert t_s == t_t, "Proximal bias is only available for self-attention."
161
+ scores = scores + self._attention_bias_proximal(t_s).to(device=scores.device, dtype=scores.dtype)
162
+ if mask is not None:
163
+ scores = scores.masked_fill(mask == 0, -1e4)
164
+ if self.block_length is not None:
165
+ assert t_s == t_t, "Local attention is only available for self-attention."
166
+ block_mask = torch.ones_like(scores).triu(-self.block_length).tril(self.block_length)
167
+ scores = scores.masked_fill(block_mask == 0, -1e4)
168
+ p_attn = F.softmax(scores, dim=-1) # [b, n_h, t_t, t_s]
169
+ p_attn = self.drop(p_attn)
170
+ output = torch.matmul(p_attn, value)
171
+ if self.window_size is not None:
172
+ relative_weights = self._absolute_position_to_relative_position(p_attn)
173
+ value_relative_embeddings = self._get_relative_embeddings(self.emb_rel_v, t_s)
174
+ output = output + self._matmul_with_relative_values(relative_weights, value_relative_embeddings)
175
+ output = output.transpose(2, 3).contiguous().view(b, d, t_t) # [b, n_h, t_t, d_k] -> [b, d, t_t]
176
+ return output, p_attn
177
+
178
+ def _matmul_with_relative_values(self, x, y):
179
+ """
180
+ x: [b, h, l, m]
181
+ y: [h or 1, m, d]
182
+ ret: [b, h, l, d]
183
+ """
184
+ ret = torch.matmul(x, y.unsqueeze(0))
185
+ return ret
186
+
187
+ def _matmul_with_relative_keys(self, x, y):
188
+ """
189
+ x: [b, h, l, d]
190
+ y: [h or 1, m, d]
191
+ ret: [b, h, l, m]
192
+ """
193
+ ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1))
194
+ return ret
195
+
196
+ def _get_relative_embeddings(self, relative_embeddings, length):
197
+ max_relative_position = 2 * self.window_size + 1
198
+ # Pad first before slice to avoid using cond ops.
199
+ pad_length = max(length - (self.window_size + 1), 0)
200
+ slice_start_position = max((self.window_size + 1) - length, 0)
201
+ slice_end_position = slice_start_position + 2 * length - 1
202
+ if pad_length > 0:
203
+ padded_relative_embeddings = F.pad(
204
+ relative_embeddings,
205
+ commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]))
206
+ else:
207
+ padded_relative_embeddings = relative_embeddings
208
+ used_relative_embeddings = padded_relative_embeddings[:,slice_start_position:slice_end_position]
209
+ return used_relative_embeddings
210
+
211
+ def _relative_position_to_absolute_position(self, x):
212
+ """
213
+ x: [b, h, l, 2*l-1]
214
+ ret: [b, h, l, l]
215
+ """
216
+ batch, heads, length, _ = x.size()
217
+ # Concat columns of pad to shift from relative to absolute indexing.
218
+ x = F.pad(x, commons.convert_pad_shape([[0,0],[0,0],[0,0],[0,1]]))
219
+
220
+ # Concat extra elements so to add up to shape (len+1, 2*len-1).
221
+ x_flat = x.view([batch, heads, length * 2 * length])
222
+ x_flat = F.pad(x_flat, commons.convert_pad_shape([[0,0],[0,0],[0,length-1]]))
223
+
224
+ # Reshape and slice out the padded elements.
225
+ x_final = x_flat.view([batch, heads, length+1, 2*length-1])[:, :, :length, length-1:]
226
+ return x_final
227
+
228
+ def _absolute_position_to_relative_position(self, x):
229
+ """
230
+ x: [b, h, l, l]
231
+ ret: [b, h, l, 2*l-1]
232
+ """
233
+ batch, heads, length, _ = x.size()
234
+ # padd along column
235
+ x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length-1]]))
236
+ x_flat = x.view([batch, heads, length**2 + length*(length -1)])
237
+ # add 0's in the beginning that will skew the elements after reshape
238
+ x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]]))
239
+ x_final = x_flat.view([batch, heads, length, 2*length])[:,:,:,1:]
240
+ return x_final
241
+
242
+ def _attention_bias_proximal(self, length):
243
+ """Bias for self-attention to encourage attention to close positions.
244
+ Args:
245
+ length: an integer scalar.
246
+ Returns:
247
+ a Tensor with shape [1, 1, length, length]
248
+ """
249
+ r = torch.arange(length, dtype=torch.float32)
250
+ diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1)
251
+ return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0)
252
+
253
+
254
+ class FFN(nn.Module):
255
+ def __init__(self, in_channels, out_channels, filter_channels, kernel_size, p_dropout=0., activation=None, causal=False):
256
+ super().__init__()
257
+ self.in_channels = in_channels
258
+ self.out_channels = out_channels
259
+ self.filter_channels = filter_channels
260
+ self.kernel_size = kernel_size
261
+ self.p_dropout = p_dropout
262
+ self.activation = activation
263
+ self.causal = causal
264
+
265
+ if causal:
266
+ self.padding = self._causal_padding
267
+ else:
268
+ self.padding = self._same_padding
269
+
270
+ self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size)
271
+ self.conv_2 = nn.Conv1d(filter_channels, out_channels, kernel_size)
272
+ self.drop = nn.Dropout(p_dropout)
273
+
274
+ def forward(self, x, x_mask):
275
+ x = self.conv_1(self.padding(x * x_mask))
276
+ if self.activation == "gelu":
277
+ x = x * torch.sigmoid(1.702 * x)
278
+ else:
279
+ x = torch.relu(x)
280
+ x = self.drop(x)
281
+ x = self.conv_2(self.padding(x * x_mask))
282
+ return x * x_mask
283
+
284
+ def _causal_padding(self, x):
285
+ if self.kernel_size == 1:
286
+ return x
287
+ pad_l = self.kernel_size - 1
288
+ pad_r = 0
289
+ padding = [[0, 0], [0, 0], [pad_l, pad_r]]
290
+ x = F.pad(x, commons.convert_pad_shape(padding))
291
+ return x
292
+
293
+ def _same_padding(self, x):
294
+ if self.kernel_size == 1:
295
+ return x
296
+ pad_l = (self.kernel_size - 1) // 2
297
+ pad_r = self.kernel_size // 2
298
+ padding = [[0, 0], [0, 0], [pad_l, pad_r]]
299
+ x = F.pad(x, commons.convert_pad_shape(padding))
300
+ return x
chinese_dialect_lexicons/changzhou.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Changzhou dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "changzhou.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "changzhou.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/changzhou.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db4ec02be9812e804291a88f9a984f544e221ed472f682bba8da5ecbefbabd8c
3
+ size 96119
chinese_dialect_lexicons/cixi.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Cixi dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "cixi.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "cixi.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/cixi.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8113aca87c4728c66cfa6c7b5adfbb596a2930df9b7c6187c6a227ff2de87f00
3
+ size 98015
chinese_dialect_lexicons/fuyang.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Fuyang dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "fuyang.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "fuyang.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/fuyang.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e1fbec75e090550cf131de226a1d867c7896b51170f8d7d21f9101297f4c08
3
+ size 83664
chinese_dialect_lexicons/hangzhou.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Hangzhounese to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "hangzhou.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [{
11
+ "dict": {
12
+ "type": "group",
13
+ "dicts": [{
14
+ "type": "ocd2",
15
+ "file": "hangzhou.ocd2"
16
+ }]
17
+ }
18
+ }]
19
+ }
chinese_dialect_lexicons/hangzhou.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7a9eb5fbd3b8c91745dbb2734f2700b75a47c3821e381566afc567d7da4d9d5
3
+ size 427268
chinese_dialect_lexicons/jiading.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Jiading dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "jiading.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "jiading.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/jiading.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f3ac33214e65e7223e8c561bc12ec90a2d87db3cf8d20e87a30bbd8eb788187
3
+ size 111144
chinese_dialect_lexicons/jiashan.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Jiashan dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "jiashan.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "jiashan.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/jiashan.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6310729b85976b6e6407b4f66ad13a3ad7a51a42f3c05c98e294bcbb3159456c
3
+ size 71716
chinese_dialect_lexicons/jingjiang.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Jingjiang dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "jingjiang.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "jingjiang.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/jingjiang.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:154d9cac032a3284a6aa175689a5805f068f6896429009a7d94d41616694131f
3
+ size 86093
chinese_dialect_lexicons/jyutjyu.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Cantonese to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "jyutjyu.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [{
11
+ "dict": {
12
+ "type": "group",
13
+ "dicts": [{
14
+ "type": "ocd2",
15
+ "file": "jyutjyu.ocd2"
16
+ }]
17
+ }
18
+ }]
19
+ }
chinese_dialect_lexicons/jyutjyu.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aea11bfe51b184b3f000d20ab49757979b216219203839d2b2e3c1f990a13fa5
3
+ size 2432991
chinese_dialect_lexicons/linping.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Linping dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "linping.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "linping.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/linping.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fcd3b53e5aa6cd64419835c14769d53cc230e229c0fbd20efb65c46e07b712b
3
+ size 65351
chinese_dialect_lexicons/ningbo.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Ningbonese to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "ningbo.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [{
11
+ "dict": {
12
+ "type": "group",
13
+ "dicts": [{
14
+ "type": "ocd2",
15
+ "file": "ningbo.ocd2"
16
+ }]
17
+ }
18
+ }]
19
+ }
chinese_dialect_lexicons/ningbo.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5876b000f611ea52bf18cda5bcbdd0cfcc55e1c09774d9a24e3b5c7d90002435
3
+ size 386414
chinese_dialect_lexicons/pinghu.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Pinghu dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "pinghu.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "pinghu.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/pinghu.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01b0e0dad8cddb0e2cb23899d4a2f97f2c0b369d5ff369076c5cdb7bd4528e4f
3
+ size 69420
chinese_dialect_lexicons/ruao.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Ruao dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "ruao.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "ruao.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/ruao.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:259a42ad761233f7d6ca6eec39268e27a65b2ded025f2b7725501cf5e3e02d8a
3
+ size 58841
chinese_dialect_lexicons/sanmen.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Sanmen dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "sanmen.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "sanmen.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/sanmen.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afa70a920b6805e279ed15246026b70dbeb2a8329ad585fbae8cfdf45e7489a9
3
+ size 80210
chinese_dialect_lexicons/shaoxing.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Shaoxing dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "shaoxing.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "shaoxing.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/shaoxing.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a347aa25bf435803727b4194cf34de4de3e61f03427ee21043a711cdb0b9d940
3
+ size 113108
chinese_dialect_lexicons/suichang.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Suichang dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "suichang.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "suichang.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/suichang.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8062749ff70db65d469d91bd92375607f8648a138b896e58cf7c28edb8f970e
3
+ size 81004
chinese_dialect_lexicons/suzhou.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Suzhounese to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "suzhou.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [{
11
+ "dict": {
12
+ "type": "group",
13
+ "dicts": [{
14
+ "type": "ocd2",
15
+ "file": "suzhou.ocd2"
16
+ }]
17
+ }
18
+ }]
19
+ }
chinese_dialect_lexicons/suzhou.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8815595a248135874329e7f34662dd243a266be3e8375e8409f95da95d6d540
3
+ size 506184
chinese_dialect_lexicons/tiantai.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Tiantai dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "tiantai.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "tiantai.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/tiantai.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:339e0ba454444dbf8fbe75de6f49769d11dfe2f2f5ba7dea74ba20fba5d6d343
3
+ size 120951
chinese_dialect_lexicons/tongxiang.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Tongxiang dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "tongxiang.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "tongxiang.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/tongxiang.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7432d85588eb8ba34e7baea9f26af8d332572037ff7d41a6730f96c02e5fd063
3
+ size 137499
chinese_dialect_lexicons/wenzhou.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Wenzhou dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "wenzhou.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "wenzhou.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/wenzhou.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed05c0c615a38f55a139a73bcc3960897d8cd567c9482a0a06b272eb0b46aa05
3
+ size 83121
chinese_dialect_lexicons/wuxi.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Wuxinese to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "wuxi.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [{
11
+ "dict": {
12
+ "type": "group",
13
+ "dicts": [{
14
+ "type": "ocd2",
15
+ "file": "wuxi.ocd2"
16
+ }]
17
+ }
18
+ }]
19
+ }
chinese_dialect_lexicons/wuxi.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f27ffaa75e542e4464e53c4acf94607be1526a90922ac8b28870104aaebdff
3
+ size 358666
chinese_dialect_lexicons/xiaoshan.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Xiaoshan dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "xiaoshan.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [
11
+ {
12
+ "dict": {
13
+ "type": "group",
14
+ "dicts": [
15
+ {
16
+ "type": "ocd2",
17
+ "file": "xiaoshan.ocd2"
18
+ }
19
+ ]
20
+ }
21
+ }
22
+ ]
23
+ }
chinese_dialect_lexicons/xiaoshan.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:651bd314c5e57312eeee537037f6c6e56a12ef446216264aad70bf68bf6a283d
3
+ size 77119
chinese_dialect_lexicons/xiashi.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Xiashi dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "xiashi.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [{
11
+ "dict": {
12
+ "type": "group",
13
+ "dicts": [{
14
+ "type": "ocd2",
15
+ "file": "xiashi.ocd2"
16
+ }]
17
+ }
18
+ }]
19
+ }
chinese_dialect_lexicons/xiashi.ocd2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bc638633b82e196776a3adfc621c854d0da923b7cff6e7d0c9576723cdc03cd
3
+ size 70314
chinese_dialect_lexicons/yixing.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Yixing dialect to IPA",
3
+ "segmentation": {
4
+ "type": "mmseg",
5
+ "dict": {
6
+ "type": "ocd2",
7
+ "file": "yixing.ocd2"
8
+ }
9
+ },
10
+ "conversion_chain": [{
11
+ "dict": {
12
+ "type": "group",
13
+ "dicts": [{
14
+ "type": "ocd2",
15
+ "file": "yixing.ocd2"
16
+ }]
17
+ }
18
+ }]
19
+ }