Spaces:
Runtime error
Runtime error
Hecheng0625
commited on
Commit
•
7ee3434
1
Parent(s):
c968fc3
Upload 61 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- app.py +355 -0
- text/__init__.py +79 -0
- text/cleaners.py +98 -0
- text/cmudict.py +145 -0
- text/g2p.py +38 -0
- text/g2p_module.py +230 -0
- text/lexicon/librispeech-lexicon.txt +0 -0
- text/lexicon/pinyin-lexicon-r.txt +4120 -0
- text/numbers.py +77 -0
- text/pinyin.py +218 -0
- text/symbol_table.py +292 -0
- text/symbols.py +34 -0
- text/text_token_collation.py +123 -0
- utils/HyperParams/__init__.py +6 -0
- utils/HyperParams/hps.py +43 -0
- utils/__init__.py +0 -0
- utils/audio.py +74 -0
- utils/audio_slicer.py +476 -0
- utils/cut_by_vad.py +105 -0
- utils/data_utils.py +588 -0
- utils/distribution.py +270 -0
- utils/dsp.py +97 -0
- utils/duration.py +86 -0
- utils/f0.py +275 -0
- utils/hparam.py +659 -0
- utils/hubert.py +155 -0
- utils/io.py +182 -0
- utils/io_optim.py +123 -0
- utils/mel.py +280 -0
- utils/mert.py +139 -0
- utils/mfa_prepare.py +116 -0
- utils/model_summary.py +74 -0
- utils/prompt_preparer.py +68 -0
- utils/ssim.py +80 -0
- utils/stft.py +278 -0
- utils/symbol_table.py +317 -0
- utils/tokenizer.py +150 -0
- utils/topk_sampling.py +89 -0
- utils/trainer_utils.py +16 -0
- utils/util.py +687 -0
- utils/whisper_transcription.py +122 -0
- utils/world.py +92 -0
- visualization/SingVisio/System_Introduction_of_SingVisio_V2.pdf +3 -0
- visualization/SingVisio/webpage/Dockerfile +23 -0
- visualization/SingVisio/webpage/README.md +126 -0
- visualization/SingVisio/webpage/config/default.json +407 -0
- visualization/SingVisio/webpage/img/difference_bar.jpg +0 -0
- visualization/SingVisio/webpage/img/syllable.png +0 -0
- visualization/SingVisio/webpage/index.html +390 -0
.gitattributes
CHANGED
@@ -37,3 +37,4 @@ imgs/vocoder/gan/MSSBCQTD.png filter=lfs diff=lfs merge=lfs -text
|
|
37 |
models/codec/facodec/modules/JDC/bst.t7 filter=lfs diff=lfs merge=lfs -text
|
38 |
models/tts/maskgct/g2p/sources/chinese_lexicon.txt filter=lfs diff=lfs merge=lfs -text
|
39 |
models/tts/maskgct/wav/prompt.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
37 |
models/codec/facodec/modules/JDC/bst.t7 filter=lfs diff=lfs merge=lfs -text
|
38 |
models/tts/maskgct/g2p/sources/chinese_lexicon.txt filter=lfs diff=lfs merge=lfs -text
|
39 |
models/tts/maskgct/wav/prompt.wav filter=lfs diff=lfs merge=lfs -text
|
40 |
+
visualization/SingVisio/System_Introduction_of_SingVisio_V2.pdf filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import safetensors
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
+
import soundfile as sf
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
import librosa
|
9 |
+
from models.codec.kmeans.repcodec_model import RepCodec
|
10 |
+
from models.tts.maskgct.maskgct_s2a import MaskGCT_S2A
|
11 |
+
from models.tts.maskgct.maskgct_t2s import MaskGCT_T2S
|
12 |
+
from models.codec.amphion_codec.codec import CodecEncoder, CodecDecoder
|
13 |
+
from transformers import Wav2Vec2BertModel
|
14 |
+
from utils.util import load_config
|
15 |
+
from models.tts.maskgct.g2p.g2p_generation import g2p, chn_eng_g2p
|
16 |
+
|
17 |
+
from transformers import SeamlessM4TFeatureExtractor
|
18 |
+
|
19 |
+
processor = SeamlessM4TFeatureExtractor.from_pretrained("facebook/w2v-bert-2.0")
|
20 |
+
|
21 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
22 |
+
|
23 |
+
|
24 |
+
def g2p_(text, language):
|
25 |
+
if language in ["zh", "en"]:
|
26 |
+
return chn_eng_g2p(text)
|
27 |
+
else:
|
28 |
+
return g2p(text, sentence=None, language=language)
|
29 |
+
|
30 |
+
|
31 |
+
def build_t2s_model(cfg, device):
|
32 |
+
t2s_model = MaskGCT_T2S(cfg=cfg)
|
33 |
+
t2s_model.eval()
|
34 |
+
t2s_model.to(device)
|
35 |
+
return t2s_model
|
36 |
+
|
37 |
+
|
38 |
+
def build_s2a_model(cfg, device):
|
39 |
+
soundstorm_model = MaskGCT_S2A(cfg=cfg)
|
40 |
+
soundstorm_model.eval()
|
41 |
+
soundstorm_model.to(device)
|
42 |
+
return soundstorm_model
|
43 |
+
|
44 |
+
|
45 |
+
def build_semantic_model(device):
|
46 |
+
semantic_model = Wav2Vec2BertModel.from_pretrained("facebook/w2v-bert-2.0")
|
47 |
+
semantic_model.eval()
|
48 |
+
semantic_model.to(device)
|
49 |
+
stat_mean_var = torch.load("./models/tts/maskgct/ckpt/wav2vec2bert_stats.pt")
|
50 |
+
semantic_mean = stat_mean_var["mean"]
|
51 |
+
semantic_std = torch.sqrt(stat_mean_var["var"])
|
52 |
+
semantic_mean = semantic_mean.to(device)
|
53 |
+
semantic_std = semantic_std.to(device)
|
54 |
+
return semantic_model, semantic_mean, semantic_std
|
55 |
+
|
56 |
+
|
57 |
+
def build_semantic_codec(cfg, device):
|
58 |
+
semantic_codec = RepCodec(cfg=cfg)
|
59 |
+
semantic_codec.eval()
|
60 |
+
semantic_codec.to(device)
|
61 |
+
return semantic_codec
|
62 |
+
|
63 |
+
|
64 |
+
def build_acoustic_codec(cfg, device):
|
65 |
+
codec_encoder = CodecEncoder(cfg=cfg.encoder)
|
66 |
+
codec_decoder = CodecDecoder(cfg=cfg.decoder)
|
67 |
+
codec_encoder.eval()
|
68 |
+
codec_decoder.eval()
|
69 |
+
codec_encoder.to(device)
|
70 |
+
codec_decoder.to(device)
|
71 |
+
return codec_encoder, codec_decoder
|
72 |
+
|
73 |
+
|
74 |
+
@torch.no_grad()
|
75 |
+
def extract_features(speech, processor):
|
76 |
+
inputs = processor(speech, sampling_rate=16000, return_tensors="pt")
|
77 |
+
input_features = inputs["input_features"][0]
|
78 |
+
attention_mask = inputs["attention_mask"][0]
|
79 |
+
return input_features, attention_mask
|
80 |
+
|
81 |
+
|
82 |
+
@torch.no_grad()
|
83 |
+
def extract_semantic_code(semantic_mean, semantic_std, input_features, attention_mask):
|
84 |
+
vq_emb = semantic_model(
|
85 |
+
input_features=input_features,
|
86 |
+
attention_mask=attention_mask,
|
87 |
+
output_hidden_states=True,
|
88 |
+
)
|
89 |
+
feat = vq_emb.hidden_states[17] # (B, T, C)
|
90 |
+
feat = (feat - semantic_mean.to(feat)) / semantic_std.to(feat)
|
91 |
+
|
92 |
+
semantic_code, rec_feat = semantic_codec.quantize(feat) # (B, T)
|
93 |
+
return semantic_code, rec_feat
|
94 |
+
|
95 |
+
|
96 |
+
@torch.no_grad()
|
97 |
+
def extract_acoustic_code(speech):
|
98 |
+
vq_emb = codec_encoder(speech.unsqueeze(1))
|
99 |
+
_, vq, _, _, _ = codec_decoder.quantizer(vq_emb)
|
100 |
+
acoustic_code = vq.permute(1, 2, 0)
|
101 |
+
return acoustic_code
|
102 |
+
|
103 |
+
|
104 |
+
@torch.no_grad()
|
105 |
+
def text2semantic(
|
106 |
+
device,
|
107 |
+
prompt_speech,
|
108 |
+
prompt_text,
|
109 |
+
prompt_language,
|
110 |
+
target_text,
|
111 |
+
target_language,
|
112 |
+
target_len=None,
|
113 |
+
n_timesteps=50,
|
114 |
+
cfg=2.5,
|
115 |
+
rescale_cfg=0.75,
|
116 |
+
):
|
117 |
+
|
118 |
+
prompt_phone_id = g2p_(prompt_text, prompt_language)[1]
|
119 |
+
|
120 |
+
target_phone_id = g2p_(target_text, target_language)[1]
|
121 |
+
|
122 |
+
if target_len is None:
|
123 |
+
target_len = int(
|
124 |
+
(len(prompt_speech) * len(target_phone_id) / len(prompt_phone_id))
|
125 |
+
/ 16000
|
126 |
+
* 50
|
127 |
+
)
|
128 |
+
else:
|
129 |
+
target_len = int(target_len * 50)
|
130 |
+
|
131 |
+
prompt_phone_id = torch.tensor(prompt_phone_id, dtype=torch.long).to(device)
|
132 |
+
target_phone_id = torch.tensor(target_phone_id, dtype=torch.long).to(device)
|
133 |
+
|
134 |
+
phone_id = torch.cat([prompt_phone_id, target_phone_id])
|
135 |
+
|
136 |
+
input_fetures, attention_mask = extract_features(prompt_speech, processor)
|
137 |
+
input_fetures = input_fetures.unsqueeze(0).to(device)
|
138 |
+
attention_mask = attention_mask.unsqueeze(0).to(device)
|
139 |
+
semantic_code, _ = extract_semantic_code(
|
140 |
+
semantic_mean, semantic_std, input_fetures, attention_mask
|
141 |
+
)
|
142 |
+
|
143 |
+
predict_semantic = t2s_model.reverse_diffusion(
|
144 |
+
semantic_code[:, :],
|
145 |
+
target_len,
|
146 |
+
phone_id.unsqueeze(0),
|
147 |
+
n_timesteps=n_timesteps,
|
148 |
+
cfg=cfg,
|
149 |
+
rescale_cfg=rescale_cfg,
|
150 |
+
)
|
151 |
+
|
152 |
+
combine_semantic_code = torch.cat([semantic_code[:, :], predict_semantic], dim=-1)
|
153 |
+
prompt_semantic_code = semantic_code
|
154 |
+
|
155 |
+
return combine_semantic_code, prompt_semantic_code
|
156 |
+
|
157 |
+
|
158 |
+
@torch.no_grad()
|
159 |
+
def semantic2acoustic(
|
160 |
+
device,
|
161 |
+
combine_semantic_code,
|
162 |
+
acoustic_code,
|
163 |
+
n_timesteps=[25, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
164 |
+
cfg=2.5,
|
165 |
+
rescale_cfg=0.75,
|
166 |
+
):
|
167 |
+
|
168 |
+
semantic_code = combine_semantic_code
|
169 |
+
|
170 |
+
cond = s2a_model_1layer.cond_emb(semantic_code)
|
171 |
+
prompt = acoustic_code[:, :, :]
|
172 |
+
predict_1layer = s2a_model_1layer.reverse_diffusion(
|
173 |
+
cond=cond,
|
174 |
+
prompt=prompt,
|
175 |
+
temp=1.5,
|
176 |
+
filter_thres=0.98,
|
177 |
+
n_timesteps=n_timesteps[:1],
|
178 |
+
cfg=cfg,
|
179 |
+
rescale_cfg=rescale_cfg,
|
180 |
+
)
|
181 |
+
|
182 |
+
cond = s2a_model_full.cond_emb(semantic_code)
|
183 |
+
prompt = acoustic_code[:, :, :]
|
184 |
+
predict_full = s2a_model_full.reverse_diffusion(
|
185 |
+
cond=cond,
|
186 |
+
prompt=prompt,
|
187 |
+
temp=1.5,
|
188 |
+
filter_thres=0.98,
|
189 |
+
n_timesteps=n_timesteps,
|
190 |
+
cfg=cfg,
|
191 |
+
rescale_cfg=rescale_cfg,
|
192 |
+
gt_code=predict_1layer,
|
193 |
+
)
|
194 |
+
|
195 |
+
vq_emb = codec_decoder.vq2emb(predict_full.permute(2, 0, 1), n_quantizers=12)
|
196 |
+
recovered_audio = codec_decoder(vq_emb)
|
197 |
+
prompt_vq_emb = codec_decoder.vq2emb(prompt.permute(2, 0, 1), n_quantizers=12)
|
198 |
+
recovered_prompt_audio = codec_decoder(prompt_vq_emb)
|
199 |
+
recovered_prompt_audio = recovered_prompt_audio[0][0].cpu().numpy()
|
200 |
+
recovered_audio = recovered_audio[0][0].cpu().numpy()
|
201 |
+
combine_audio = np.concatenate([recovered_prompt_audio, recovered_audio])
|
202 |
+
|
203 |
+
return combine_audio, recovered_audio
|
204 |
+
|
205 |
+
|
206 |
+
# Load the model and checkpoints
|
207 |
+
def load_models():
|
208 |
+
cfg_path = "./models/tts/maskgct/config/maskgct.json"
|
209 |
+
|
210 |
+
cfg = load_config(cfg_path)
|
211 |
+
semantic_model, semantic_mean, semantic_std = build_semantic_model(device)
|
212 |
+
semantic_codec = build_semantic_codec(cfg.model.semantic_codec, device)
|
213 |
+
codec_encoder, codec_decoder = build_acoustic_codec(
|
214 |
+
cfg.model.acoustic_codec, device
|
215 |
+
)
|
216 |
+
t2s_model = build_t2s_model(cfg.model.t2s_model, device)
|
217 |
+
s2a_model_1layer = build_s2a_model(cfg.model.s2a_model.s2a_1layer, device)
|
218 |
+
s2a_model_full = build_s2a_model(cfg.model.s2a_model.s2a_full, device)
|
219 |
+
|
220 |
+
# Download checkpoints
|
221 |
+
semantic_code_ckpt = hf_hub_download(
|
222 |
+
"amphion/MaskGCT", filename="semantic_codec/model.safetensors"
|
223 |
+
)
|
224 |
+
codec_encoder_ckpt = hf_hub_download(
|
225 |
+
"amphion/MaskGCT", filename="acoustic_codec/model.safetensors"
|
226 |
+
)
|
227 |
+
codec_decoder_ckpt = hf_hub_download(
|
228 |
+
"amphion/MaskGCT", filename="acoustic_codec/model_1.safetensors"
|
229 |
+
)
|
230 |
+
t2s_model_ckpt = hf_hub_download(
|
231 |
+
"amphion/MaskGCT", filename="t2s_model/model.safetensors"
|
232 |
+
)
|
233 |
+
s2a_1layer_ckpt = hf_hub_download(
|
234 |
+
"amphion/MaskGCT", filename="s2a_model/s2a_model_1layer/model.safetensors"
|
235 |
+
)
|
236 |
+
s2a_full_ckpt = hf_hub_download(
|
237 |
+
"amphion/MaskGCT", filename="s2a_model/s2a_model_full/model.safetensors"
|
238 |
+
)
|
239 |
+
|
240 |
+
safetensors.torch.load_model(semantic_codec, semantic_code_ckpt)
|
241 |
+
safetensors.torch.load_model(codec_encoder, codec_encoder_ckpt)
|
242 |
+
safetensors.torch.load_model(codec_decoder, codec_decoder_ckpt)
|
243 |
+
safetensors.torch.load_model(t2s_model, t2s_model_ckpt)
|
244 |
+
safetensors.torch.load_model(s2a_model_1layer, s2a_1layer_ckpt)
|
245 |
+
safetensors.torch.load_model(s2a_model_full, s2a_full_ckpt)
|
246 |
+
|
247 |
+
return (
|
248 |
+
semantic_model,
|
249 |
+
semantic_mean,
|
250 |
+
semantic_std,
|
251 |
+
semantic_codec,
|
252 |
+
codec_encoder,
|
253 |
+
codec_decoder,
|
254 |
+
t2s_model,
|
255 |
+
s2a_model_1layer,
|
256 |
+
s2a_model_full,
|
257 |
+
)
|
258 |
+
|
259 |
+
|
260 |
+
@torch.no_grad()
|
261 |
+
def maskgct_inference(
|
262 |
+
prompt_speech_path,
|
263 |
+
prompt_text,
|
264 |
+
target_text,
|
265 |
+
language="en",
|
266 |
+
target_language="en",
|
267 |
+
target_len=None,
|
268 |
+
n_timesteps=25,
|
269 |
+
cfg=2.5,
|
270 |
+
rescale_cfg=0.75,
|
271 |
+
n_timesteps_s2a=[25, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
272 |
+
cfg_s2a=2.5,
|
273 |
+
rescale_cfg_s2a=0.75,
|
274 |
+
device=torch.device("cuda:5"),
|
275 |
+
):
|
276 |
+
speech_16k = librosa.load(prompt_speech_path, sr=16000)[0]
|
277 |
+
speech = librosa.load(prompt_speech_path, sr=24000)[0]
|
278 |
+
|
279 |
+
combine_semantic_code, _ = text2semantic(
|
280 |
+
device,
|
281 |
+
speech_16k,
|
282 |
+
prompt_text,
|
283 |
+
language,
|
284 |
+
target_text,
|
285 |
+
target_language,
|
286 |
+
target_len,
|
287 |
+
n_timesteps,
|
288 |
+
cfg,
|
289 |
+
rescale_cfg,
|
290 |
+
)
|
291 |
+
acoustic_code = extract_acoustic_code(torch.tensor(speech).unsqueeze(0).to(device))
|
292 |
+
_, recovered_audio = semantic2acoustic(
|
293 |
+
device,
|
294 |
+
combine_semantic_code,
|
295 |
+
acoustic_code,
|
296 |
+
n_timesteps=n_timesteps_s2a,
|
297 |
+
cfg=cfg_s2a,
|
298 |
+
rescale_cfg=rescale_cfg_s2a,
|
299 |
+
)
|
300 |
+
|
301 |
+
return recovered_audio
|
302 |
+
|
303 |
+
|
304 |
+
@torch.no_grad()
|
305 |
+
def inference(
|
306 |
+
prompt_wav,
|
307 |
+
prompt_text,
|
308 |
+
target_text,
|
309 |
+
target_len,
|
310 |
+
n_timesteps,
|
311 |
+
language,
|
312 |
+
target_language,
|
313 |
+
):
|
314 |
+
save_path = "./output/output.wav"
|
315 |
+
os.makedirs("./output", exist_ok=True)
|
316 |
+
recovered_audio = maskgct_inference(
|
317 |
+
prompt_wav,
|
318 |
+
prompt_text,
|
319 |
+
target_text,
|
320 |
+
language,
|
321 |
+
target_language,
|
322 |
+
target_len=target_len,
|
323 |
+
n_timesteps=int(n_timesteps),
|
324 |
+
device=device,
|
325 |
+
)
|
326 |
+
sf.write(save_path, recovered_audio, 24000)
|
327 |
+
return save_path
|
328 |
+
|
329 |
+
|
330 |
+
# Language list
|
331 |
+
language_list = ["en", "zh", "ja", "ko", "fr", "de"]
|
332 |
+
|
333 |
+
# Gradio interface
|
334 |
+
iface = gr.Interface(
|
335 |
+
fn=inference,
|
336 |
+
inputs=[
|
337 |
+
gr.Audio(label="Upload Prompt Wav", type="filepath"),
|
338 |
+
gr.Textbox(label="Prompt Text"),
|
339 |
+
gr.Textbox(label="Target Text"),
|
340 |
+
gr.Number(
|
341 |
+
label="Target Duration (in seconds)", value=None
|
342 |
+
), # Removed 'optional=True'
|
343 |
+
gr.Slider(
|
344 |
+
label="Number of Timesteps", minimum=15, maximum=100, value=25, step=1
|
345 |
+
),
|
346 |
+
gr.Dropdown(label="Language", choices=language_list, value="en"),
|
347 |
+
gr.Dropdown(label="Target Language", choices=language_list, value="en"),
|
348 |
+
],
|
349 |
+
outputs=gr.Audio(label="Generated Audio"),
|
350 |
+
title="MaskGCT TTS Demo",
|
351 |
+
description="Generate speech from text using the MaskGCT model.",
|
352 |
+
)
|
353 |
+
|
354 |
+
# Launch the interface
|
355 |
+
iface.launch(allowed_paths=["./output"])
|
text/__init__.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
""" This code is modified from https://github.com/keithito/tacotron """
|
7 |
+
import re
|
8 |
+
from text import cleaners
|
9 |
+
from text.symbols import symbols
|
10 |
+
|
11 |
+
|
12 |
+
# Mappings from symbol to numeric ID and vice versa:
|
13 |
+
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
|
14 |
+
_id_to_symbol = {i: s for i, s in enumerate(symbols)}
|
15 |
+
|
16 |
+
# Regular expression matching text enclosed in curly braces:
|
17 |
+
_curly_re = re.compile(r"(.*?)\{(.+?)\}(.*)")
|
18 |
+
|
19 |
+
|
20 |
+
def text_to_sequence(text, cleaner_names):
|
21 |
+
"""Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
|
22 |
+
|
23 |
+
The text can optionally have ARPAbet sequences enclosed in curly braces embedded
|
24 |
+
in it. For example, "Turn left on {HH AW1 S S T AH0 N} Street."
|
25 |
+
|
26 |
+
Args:
|
27 |
+
text: string to convert to a sequence
|
28 |
+
cleaner_names: names of the cleaner functions to run the text through
|
29 |
+
|
30 |
+
Returns:
|
31 |
+
List of integers corresponding to the symbols in the text
|
32 |
+
"""
|
33 |
+
sequence = []
|
34 |
+
|
35 |
+
# Check for curly braces and treat their contents as ARPAbet:
|
36 |
+
while len(text):
|
37 |
+
m = _curly_re.match(text)
|
38 |
+
|
39 |
+
if not m:
|
40 |
+
sequence += _symbols_to_sequence(_clean_text(text, cleaner_names))
|
41 |
+
break
|
42 |
+
sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names))
|
43 |
+
sequence += _arpabet_to_sequence(m.group(2))
|
44 |
+
text = m.group(3)
|
45 |
+
return sequence
|
46 |
+
|
47 |
+
|
48 |
+
def sequence_to_text(sequence):
|
49 |
+
"""Converts a sequence of IDs back to a string"""
|
50 |
+
result = ""
|
51 |
+
for symbol_id in sequence:
|
52 |
+
if symbol_id in _id_to_symbol:
|
53 |
+
s = _id_to_symbol[symbol_id]
|
54 |
+
# Enclose ARPAbet back in curly braces:
|
55 |
+
if len(s) > 1 and s[0] == "@":
|
56 |
+
s = "{%s}" % s[1:]
|
57 |
+
result += s
|
58 |
+
return result.replace("}{", " ")
|
59 |
+
|
60 |
+
|
61 |
+
def _clean_text(text, cleaner_names):
|
62 |
+
for name in cleaner_names:
|
63 |
+
cleaner = getattr(cleaners, name)
|
64 |
+
if not cleaner:
|
65 |
+
raise Exception("Unknown cleaner: %s" % name)
|
66 |
+
text = cleaner(text)
|
67 |
+
return text
|
68 |
+
|
69 |
+
|
70 |
+
def _symbols_to_sequence(symbols):
|
71 |
+
return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]
|
72 |
+
|
73 |
+
|
74 |
+
def _arpabet_to_sequence(text):
|
75 |
+
return _symbols_to_sequence(["@" + s for s in text.split()])
|
76 |
+
|
77 |
+
|
78 |
+
def _should_keep_symbol(s):
|
79 |
+
return s in _symbol_to_id and s != "_" and s != "~"
|
text/cleaners.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
""" This code is modified from https://github.com/keithito/tacotron """
|
7 |
+
|
8 |
+
"""
|
9 |
+
Cleaners are transformations that run over the input text at both training and eval time.
|
10 |
+
|
11 |
+
Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
|
12 |
+
hyperparameter. Some cleaners are English-specific. You'll typically want to use:
|
13 |
+
1. "english_cleaners" for English text
|
14 |
+
2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
|
15 |
+
the Unidecode library (https://pypi.python.org/pypi/Unidecode)
|
16 |
+
3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
|
17 |
+
the symbols in symbols.py to match your data).
|
18 |
+
"""
|
19 |
+
|
20 |
+
|
21 |
+
# Regular expression matching whitespace:
|
22 |
+
import re
|
23 |
+
from unidecode import unidecode
|
24 |
+
from .numbers import normalize_numbers
|
25 |
+
|
26 |
+
_whitespace_re = re.compile(r"\s+")
|
27 |
+
|
28 |
+
# List of (regular expression, replacement) pairs for abbreviations:
|
29 |
+
_abbreviations = [
|
30 |
+
(re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
|
31 |
+
for x in [
|
32 |
+
("mrs", "misess"),
|
33 |
+
("mr", "mister"),
|
34 |
+
("dr", "doctor"),
|
35 |
+
("st", "saint"),
|
36 |
+
("co", "company"),
|
37 |
+
("jr", "junior"),
|
38 |
+
("maj", "major"),
|
39 |
+
("gen", "general"),
|
40 |
+
("drs", "doctors"),
|
41 |
+
("rev", "reverend"),
|
42 |
+
("lt", "lieutenant"),
|
43 |
+
("hon", "honorable"),
|
44 |
+
("sgt", "sergeant"),
|
45 |
+
("capt", "captain"),
|
46 |
+
("esq", "esquire"),
|
47 |
+
("ltd", "limited"),
|
48 |
+
("col", "colonel"),
|
49 |
+
("ft", "fort"),
|
50 |
+
]
|
51 |
+
]
|
52 |
+
|
53 |
+
|
54 |
+
def expand_abbreviations(text):
|
55 |
+
for regex, replacement in _abbreviations:
|
56 |
+
text = re.sub(regex, replacement, text)
|
57 |
+
return text
|
58 |
+
|
59 |
+
|
60 |
+
def expand_numbers(text):
|
61 |
+
return normalize_numbers(text)
|
62 |
+
|
63 |
+
|
64 |
+
def lowercase(text):
|
65 |
+
return text.lower()
|
66 |
+
|
67 |
+
|
68 |
+
def collapse_whitespace(text):
|
69 |
+
return re.sub(_whitespace_re, " ", text)
|
70 |
+
|
71 |
+
|
72 |
+
def convert_to_ascii(text):
|
73 |
+
return unidecode(text)
|
74 |
+
|
75 |
+
|
76 |
+
def basic_cleaners(text):
|
77 |
+
"""Basic pipeline that lowercases and collapses whitespace without transliteration."""
|
78 |
+
text = lowercase(text)
|
79 |
+
text = collapse_whitespace(text)
|
80 |
+
return text
|
81 |
+
|
82 |
+
|
83 |
+
def transliteration_cleaners(text):
|
84 |
+
"""Pipeline for non-English text that transliterates to ASCII."""
|
85 |
+
text = convert_to_ascii(text)
|
86 |
+
text = lowercase(text)
|
87 |
+
text = collapse_whitespace(text)
|
88 |
+
return text
|
89 |
+
|
90 |
+
|
91 |
+
def english_cleaners(text):
|
92 |
+
"""Pipeline for English text, including number and abbreviation expansion."""
|
93 |
+
text = convert_to_ascii(text)
|
94 |
+
text = lowercase(text)
|
95 |
+
text = expand_numbers(text)
|
96 |
+
text = expand_abbreviations(text)
|
97 |
+
text = collapse_whitespace(text)
|
98 |
+
return text
|
text/cmudict.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
""" This code is modified from https://github.com/keithito/tacotron """
|
7 |
+
|
8 |
+
import re
|
9 |
+
|
10 |
+
|
11 |
+
valid_symbols = [
|
12 |
+
"AA",
|
13 |
+
"AA0",
|
14 |
+
"AA1",
|
15 |
+
"AA2",
|
16 |
+
"AE",
|
17 |
+
"AE0",
|
18 |
+
"AE1",
|
19 |
+
"AE2",
|
20 |
+
"AH",
|
21 |
+
"AH0",
|
22 |
+
"AH1",
|
23 |
+
"AH2",
|
24 |
+
"AO",
|
25 |
+
"AO0",
|
26 |
+
"AO1",
|
27 |
+
"AO2",
|
28 |
+
"AW",
|
29 |
+
"AW0",
|
30 |
+
"AW1",
|
31 |
+
"AW2",
|
32 |
+
"AY",
|
33 |
+
"AY0",
|
34 |
+
"AY1",
|
35 |
+
"AY2",
|
36 |
+
"B",
|
37 |
+
"CH",
|
38 |
+
"D",
|
39 |
+
"DH",
|
40 |
+
"EH",
|
41 |
+
"EH0",
|
42 |
+
"EH1",
|
43 |
+
"EH2",
|
44 |
+
"ER",
|
45 |
+
"ER0",
|
46 |
+
"ER1",
|
47 |
+
"ER2",
|
48 |
+
"EY",
|
49 |
+
"EY0",
|
50 |
+
"EY1",
|
51 |
+
"EY2",
|
52 |
+
"F",
|
53 |
+
"G",
|
54 |
+
"HH",
|
55 |
+
"IH",
|
56 |
+
"IH0",
|
57 |
+
"IH1",
|
58 |
+
"IH2",
|
59 |
+
"IY",
|
60 |
+
"IY0",
|
61 |
+
"IY1",
|
62 |
+
"IY2",
|
63 |
+
"JH",
|
64 |
+
"K",
|
65 |
+
"L",
|
66 |
+
"M",
|
67 |
+
"N",
|
68 |
+
"NG",
|
69 |
+
"OW",
|
70 |
+
"OW0",
|
71 |
+
"OW1",
|
72 |
+
"OW2",
|
73 |
+
"OY",
|
74 |
+
"OY0",
|
75 |
+
"OY1",
|
76 |
+
"OY2",
|
77 |
+
"P",
|
78 |
+
"R",
|
79 |
+
"S",
|
80 |
+
"SH",
|
81 |
+
"T",
|
82 |
+
"TH",
|
83 |
+
"UH",
|
84 |
+
"UH0",
|
85 |
+
"UH1",
|
86 |
+
"UH2",
|
87 |
+
"UW",
|
88 |
+
"UW0",
|
89 |
+
"UW1",
|
90 |
+
"UW2",
|
91 |
+
"V",
|
92 |
+
"W",
|
93 |
+
"Y",
|
94 |
+
"Z",
|
95 |
+
"ZH",
|
96 |
+
]
|
97 |
+
|
98 |
+
_valid_symbol_set = set(valid_symbols)
|
99 |
+
|
100 |
+
|
101 |
+
class CMUDict:
|
102 |
+
"""Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict"""
|
103 |
+
|
104 |
+
def __init__(self, file_or_path, keep_ambiguous=True):
|
105 |
+
if isinstance(file_or_path, str):
|
106 |
+
with open(file_or_path, encoding="latin-1") as f:
|
107 |
+
entries = _parse_cmudict(f)
|
108 |
+
else:
|
109 |
+
entries = _parse_cmudict(file_or_path)
|
110 |
+
if not keep_ambiguous:
|
111 |
+
entries = {word: pron for word, pron in entries.items() if len(pron) == 1}
|
112 |
+
self._entries = entries
|
113 |
+
|
114 |
+
def __len__(self):
|
115 |
+
return len(self._entries)
|
116 |
+
|
117 |
+
def lookup(self, word):
|
118 |
+
"""Returns list of ARPAbet pronunciations of the given word."""
|
119 |
+
return self._entries.get(word.upper())
|
120 |
+
|
121 |
+
|
122 |
+
_alt_re = re.compile(r"\([0-9]+\)")
|
123 |
+
|
124 |
+
|
125 |
+
def _parse_cmudict(file):
|
126 |
+
cmudict = {}
|
127 |
+
for line in file:
|
128 |
+
if len(line) and (line[0] >= "A" and line[0] <= "Z" or line[0] == "'"):
|
129 |
+
parts = line.split(" ")
|
130 |
+
word = re.sub(_alt_re, "", parts[0])
|
131 |
+
pronunciation = _get_pronunciation(parts[1])
|
132 |
+
if pronunciation:
|
133 |
+
if word in cmudict:
|
134 |
+
cmudict[word].append(pronunciation)
|
135 |
+
else:
|
136 |
+
cmudict[word] = [pronunciation]
|
137 |
+
return cmudict
|
138 |
+
|
139 |
+
|
140 |
+
def _get_pronunciation(s):
|
141 |
+
parts = s.strip().split(" ")
|
142 |
+
for part in parts:
|
143 |
+
if part not in _valid_symbol_set:
|
144 |
+
return None
|
145 |
+
return " ".join(parts)
|
text/g2p.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import re
|
7 |
+
from g2p_en import G2p
|
8 |
+
from string import punctuation
|
9 |
+
|
10 |
+
|
11 |
+
def read_lexicon(lex_path):
|
12 |
+
lexicon = {}
|
13 |
+
with open(lex_path) as f:
|
14 |
+
for line in f:
|
15 |
+
temp = re.split(r"\s+", line.strip("\n"))
|
16 |
+
word = temp[0]
|
17 |
+
phones = temp[1:]
|
18 |
+
if word.lower() not in lexicon:
|
19 |
+
lexicon[word.lower()] = phones
|
20 |
+
return lexicon
|
21 |
+
|
22 |
+
|
23 |
+
def preprocess_english(text, lexicon):
|
24 |
+
text = text.rstrip(punctuation)
|
25 |
+
|
26 |
+
g2p = G2p()
|
27 |
+
phones = []
|
28 |
+
words = re.split(r"([,;.\-\?\!\s+])", text)
|
29 |
+
for w in words:
|
30 |
+
if w.lower() in lexicon:
|
31 |
+
phones += lexicon[w.lower()]
|
32 |
+
else:
|
33 |
+
phones += list(filter(lambda p: p != " ", g2p(w)))
|
34 |
+
phones = "}{".join(phones)
|
35 |
+
phones = re.sub(r"\{[^\w\s]?\}", "{sp}", phones)
|
36 |
+
phones = phones.replace("}{", " ")
|
37 |
+
|
38 |
+
return phones
|
text/g2p_module.py
ADDED
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
|
7 |
+
import re
|
8 |
+
from g2p_en import G2p
|
9 |
+
from string import punctuation
|
10 |
+
from typing import Any, Dict, List, Optional, Pattern, Union
|
11 |
+
|
12 |
+
from phonemizer.backend import EspeakBackend
|
13 |
+
from phonemizer.backend.espeak.language_switch import LanguageSwitch
|
14 |
+
from phonemizer.backend.espeak.words_mismatch import WordMismatch
|
15 |
+
from phonemizer.punctuation import Punctuation
|
16 |
+
from phonemizer.separator import Separator
|
17 |
+
|
18 |
+
try:
|
19 |
+
from pypinyin import Style, pinyin
|
20 |
+
from pypinyin.style._utils import get_finals, get_initials
|
21 |
+
except Exception:
|
22 |
+
pass
|
23 |
+
|
24 |
+
|
25 |
+
# This code is modified from
|
26 |
+
# https://github.com/lifeiteng/vall-e/blob/9c69096d603ce13174fb5cb025f185e2e9b36ac7/valle/data/tokenizer.py
|
27 |
+
|
28 |
+
|
29 |
+
class PypinyinBackend:
|
30 |
+
"""PypinyinBackend for Chinese. Most codes is referenced from espnet.
|
31 |
+
There are two types pinyin or initials_finals, one is
|
32 |
+
just like "ni1 hao3", the other is like "n i1 h ao3".
|
33 |
+
"""
|
34 |
+
|
35 |
+
def __init__(
|
36 |
+
self,
|
37 |
+
backend="initials_finals",
|
38 |
+
punctuation_marks: Union[str, Pattern] = Punctuation.default_marks(),
|
39 |
+
) -> None:
|
40 |
+
self.backend = backend
|
41 |
+
self.punctuation_marks = punctuation_marks
|
42 |
+
|
43 |
+
def phonemize(
|
44 |
+
self, text: List[str], separator: Separator, strip=True, njobs=1
|
45 |
+
) -> List[str]:
|
46 |
+
assert isinstance(text, List)
|
47 |
+
phonemized = []
|
48 |
+
for _text in text:
|
49 |
+
_text = re.sub(" +", " ", _text.strip())
|
50 |
+
_text = _text.replace(" ", separator.word)
|
51 |
+
phones = []
|
52 |
+
if self.backend == "pypinyin":
|
53 |
+
for n, py in enumerate(
|
54 |
+
pinyin(_text, style=Style.TONE3, neutral_tone_with_five=True)
|
55 |
+
):
|
56 |
+
if all([c in self.punctuation_marks for c in py[0]]):
|
57 |
+
if len(phones):
|
58 |
+
assert phones[-1] == separator.syllable
|
59 |
+
phones.pop(-1)
|
60 |
+
|
61 |
+
phones.extend(list(py[0]))
|
62 |
+
else:
|
63 |
+
phones.extend([py[0], separator.syllable])
|
64 |
+
elif self.backend == "pypinyin_initials_finals":
|
65 |
+
for n, py in enumerate(
|
66 |
+
pinyin(_text, style=Style.TONE3, neutral_tone_with_five=True)
|
67 |
+
):
|
68 |
+
if all([c in self.punctuation_marks for c in py[0]]):
|
69 |
+
if len(phones):
|
70 |
+
assert phones[-1] == separator.syllable
|
71 |
+
phones.pop(-1)
|
72 |
+
phones.extend(list(py[0]))
|
73 |
+
else:
|
74 |
+
if py[0][-1].isalnum():
|
75 |
+
initial = get_initials(py[0], strict=False)
|
76 |
+
if py[0][-1].isdigit():
|
77 |
+
final = get_finals(py[0][:-1], strict=False) + py[0][-1]
|
78 |
+
else:
|
79 |
+
final = get_finals(py[0], strict=False)
|
80 |
+
phones.extend(
|
81 |
+
[
|
82 |
+
initial,
|
83 |
+
separator.phone,
|
84 |
+
final,
|
85 |
+
separator.syllable,
|
86 |
+
]
|
87 |
+
)
|
88 |
+
else:
|
89 |
+
assert ValueError
|
90 |
+
else:
|
91 |
+
raise NotImplementedError
|
92 |
+
phonemized.append(
|
93 |
+
"".join(phones).rstrip(f"{separator.word}{separator.syllable}")
|
94 |
+
)
|
95 |
+
return phonemized
|
96 |
+
|
97 |
+
|
98 |
+
class G2PModule:
|
99 |
+
"""Phonemize Text."""
|
100 |
+
|
101 |
+
# We support espeak to extract IPA (International Phonetic Alphabet), which supports 100 languages,
|
102 |
+
# https://github.com/espeak-ng/espeak-ng/blob/master/docs/languages.md
|
103 |
+
|
104 |
+
def __init__(
|
105 |
+
self,
|
106 |
+
language="en-us",
|
107 |
+
backend="espeak",
|
108 |
+
separator=Separator(word="_", syllable="-", phone="|"),
|
109 |
+
preserve_punctuation=True,
|
110 |
+
punctuation_marks: Union[str, Pattern] = Punctuation.default_marks(),
|
111 |
+
with_stress: bool = False,
|
112 |
+
tie: Union[bool, str] = False,
|
113 |
+
language_switch: LanguageSwitch = "keep-flags",
|
114 |
+
words_mismatch: WordMismatch = "ignore",
|
115 |
+
) -> None:
|
116 |
+
self.separator = separator
|
117 |
+
self.backend = self._initialize_backend(
|
118 |
+
backend,
|
119 |
+
language,
|
120 |
+
punctuation_marks,
|
121 |
+
preserve_punctuation,
|
122 |
+
with_stress,
|
123 |
+
tie,
|
124 |
+
language_switch,
|
125 |
+
words_mismatch,
|
126 |
+
)
|
127 |
+
|
128 |
+
def _initialize_backend(
|
129 |
+
self,
|
130 |
+
backend,
|
131 |
+
language,
|
132 |
+
punctuation_marks,
|
133 |
+
preserve_punctuation,
|
134 |
+
with_stress,
|
135 |
+
tie,
|
136 |
+
language_switch,
|
137 |
+
words_mismatch,
|
138 |
+
):
|
139 |
+
if backend == "espeak":
|
140 |
+
return EspeakBackend(
|
141 |
+
language,
|
142 |
+
punctuation_marks=punctuation_marks,
|
143 |
+
preserve_punctuation=preserve_punctuation,
|
144 |
+
with_stress=with_stress,
|
145 |
+
tie=tie,
|
146 |
+
language_switch=language_switch,
|
147 |
+
words_mismatch=words_mismatch,
|
148 |
+
)
|
149 |
+
elif backend in ["pypinyin", "pypinyin_initials_finals"]:
|
150 |
+
if language != "cmn":
|
151 |
+
raise ValueError(
|
152 |
+
f"{language} is not supported for pypinyin and pypinyin_initials_finals."
|
153 |
+
)
|
154 |
+
return PypinyinBackend(
|
155 |
+
backend=backend,
|
156 |
+
punctuation_marks=punctuation_marks + self.separator.word,
|
157 |
+
)
|
158 |
+
else:
|
159 |
+
raise NotImplementedError(f"{backend}")
|
160 |
+
|
161 |
+
def to_list(self, phonemized: str) -> List[str]:
|
162 |
+
fields = []
|
163 |
+
for word in phonemized.split(self.separator.word):
|
164 |
+
pp = re.findall(r"\w+|[^\w\s]", word, re.UNICODE)
|
165 |
+
fields.extend(
|
166 |
+
[p for p in pp if p != self.separator.phone] + [self.separator.word]
|
167 |
+
)
|
168 |
+
assert len("".join(fields[:-1])) == len(phonemized) - phonemized.count(
|
169 |
+
self.separator.phone
|
170 |
+
)
|
171 |
+
return fields[:-1]
|
172 |
+
|
173 |
+
def phonemization(self, text, strip=True) -> List[List[str]]:
|
174 |
+
if isinstance(text, str):
|
175 |
+
text = [text]
|
176 |
+
|
177 |
+
phonemized = self.backend.phonemize(
|
178 |
+
text, separator=self.separator, strip=strip, njobs=1
|
179 |
+
)
|
180 |
+
phonemes = [self.to_list(p) for p in phonemized]
|
181 |
+
return phonemes
|
182 |
+
|
183 |
+
def g2p_conversion(self, text: str) -> List[str]:
|
184 |
+
phonemes = self.phonemization([text.strip()])
|
185 |
+
return phonemes[0]
|
186 |
+
|
187 |
+
|
188 |
+
class LexiconModule:
|
189 |
+
def __init__(self, lex_path, language="en-us") -> None:
|
190 |
+
# todo: check lexicon derivation, merge with G2PModule?
|
191 |
+
lexicon = {}
|
192 |
+
with open(lex_path) as f:
|
193 |
+
for line in f:
|
194 |
+
temp = re.split(r"\s+", line.strip("\n"))
|
195 |
+
word = temp[0]
|
196 |
+
phones = temp[1:]
|
197 |
+
if word.lower() not in lexicon:
|
198 |
+
lexicon[word.lower()] = phones
|
199 |
+
self.lexicon = lexicon
|
200 |
+
self.language = language
|
201 |
+
self.lang2g2p = {"en-us": G2p()}
|
202 |
+
|
203 |
+
def g2p_conversion(self, text):
|
204 |
+
phone = None
|
205 |
+
|
206 |
+
# todo: preprocess with other languages
|
207 |
+
if self.language == "en-us":
|
208 |
+
phone = self.preprocess_english(text)
|
209 |
+
else:
|
210 |
+
print("No support to", self.language)
|
211 |
+
raise
|
212 |
+
|
213 |
+
return phone
|
214 |
+
|
215 |
+
def preprocess_english(self, text):
|
216 |
+
text = text.rstrip(punctuation)
|
217 |
+
|
218 |
+
g2p = self.lang2g2p["en-us"]
|
219 |
+
phones = []
|
220 |
+
words = re.split(r"([,;.\-\?\!\s+])", text)
|
221 |
+
for w in words:
|
222 |
+
if w.lower() in self.lexicon:
|
223 |
+
phones += self.lexicon[w.lower()]
|
224 |
+
else:
|
225 |
+
phones += list(filter(lambda p: p != " ", g2p(w)))
|
226 |
+
phones = "}{".join(phones)
|
227 |
+
phones = re.sub(r"\{[^\w\s]?\}", "{sp}", phones)
|
228 |
+
phones = phones.replace("}{", " ")
|
229 |
+
|
230 |
+
return phones
|
text/lexicon/librispeech-lexicon.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
text/lexicon/pinyin-lexicon-r.txt
ADDED
@@ -0,0 +1,4120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
a1 a1
|
2 |
+
a2 a2
|
3 |
+
a3 a3
|
4 |
+
a4 a4
|
5 |
+
a5 a5
|
6 |
+
ai1 ai1
|
7 |
+
ai2 ai2
|
8 |
+
ai3 ai3
|
9 |
+
ai4 ai4
|
10 |
+
ai5 ai5
|
11 |
+
an1 an1
|
12 |
+
an2 an2
|
13 |
+
an3 an3
|
14 |
+
an4 an4
|
15 |
+
an5 an5
|
16 |
+
ang1 ang1
|
17 |
+
ang2 ang2
|
18 |
+
ang3 ang3
|
19 |
+
ang4 ang4
|
20 |
+
ang5 ang5
|
21 |
+
ao1 ao1
|
22 |
+
ao2 ao2
|
23 |
+
ao3 ao3
|
24 |
+
ao4 ao4
|
25 |
+
ao5 ao5
|
26 |
+
ba1 b a1
|
27 |
+
ba2 b a2
|
28 |
+
ba3 b a3
|
29 |
+
ba4 b a4
|
30 |
+
ba5 b a5
|
31 |
+
bai1 b ai1
|
32 |
+
bai2 b ai2
|
33 |
+
bai3 b ai3
|
34 |
+
bai4 b ai4
|
35 |
+
bai5 b ai5
|
36 |
+
ban1 b an1
|
37 |
+
ban2 b an2
|
38 |
+
ban3 b an3
|
39 |
+
ban4 b an4
|
40 |
+
ban5 b an5
|
41 |
+
bang1 b ang1
|
42 |
+
bang2 b ang2
|
43 |
+
bang3 b ang3
|
44 |
+
bang4 b ang4
|
45 |
+
bang5 b ang5
|
46 |
+
bao1 b ao1
|
47 |
+
bao2 b ao2
|
48 |
+
bao3 b ao3
|
49 |
+
bao4 b ao4
|
50 |
+
bao5 b ao5
|
51 |
+
bei1 b ei1
|
52 |
+
bei2 b ei2
|
53 |
+
bei3 b ei3
|
54 |
+
bei4 b ei4
|
55 |
+
bei5 b ei5
|
56 |
+
ben1 b en1
|
57 |
+
ben2 b en2
|
58 |
+
ben3 b en3
|
59 |
+
ben4 b en4
|
60 |
+
ben5 b en5
|
61 |
+
beng1 b eng1
|
62 |
+
beng2 b eng2
|
63 |
+
beng3 b eng3
|
64 |
+
beng4 b eng4
|
65 |
+
beng5 b eng5
|
66 |
+
bi1 b i1
|
67 |
+
bi2 b i2
|
68 |
+
bi3 b i3
|
69 |
+
bi4 b i4
|
70 |
+
bi5 b i5
|
71 |
+
bian1 b ian1
|
72 |
+
bian2 b ian2
|
73 |
+
bian3 b ian3
|
74 |
+
bian4 b ian4
|
75 |
+
bian5 b ian5
|
76 |
+
biao1 b iao1
|
77 |
+
biao2 b iao2
|
78 |
+
biao3 b iao3
|
79 |
+
biao4 b iao4
|
80 |
+
biao5 b iao5
|
81 |
+
bie1 b ie1
|
82 |
+
bie2 b ie2
|
83 |
+
bie3 b ie3
|
84 |
+
bie4 b ie4
|
85 |
+
bie5 b ie5
|
86 |
+
bin1 b in1
|
87 |
+
bin2 b in2
|
88 |
+
bin3 b in3
|
89 |
+
bin4 b in4
|
90 |
+
bin5 b in5
|
91 |
+
bing1 b ing1
|
92 |
+
bing2 b ing2
|
93 |
+
bing3 b ing3
|
94 |
+
bing4 b ing4
|
95 |
+
bing5 b ing5
|
96 |
+
bo1 b o1
|
97 |
+
bo2 b o2
|
98 |
+
bo3 b o3
|
99 |
+
bo4 b o4
|
100 |
+
bo5 b o5
|
101 |
+
bu1 b u1
|
102 |
+
bu2 b u2
|
103 |
+
bu3 b u3
|
104 |
+
bu4 b u4
|
105 |
+
bu5 b u5
|
106 |
+
ca1 c a1
|
107 |
+
ca2 c a2
|
108 |
+
ca3 c a3
|
109 |
+
ca4 c a4
|
110 |
+
ca5 c a5
|
111 |
+
cai1 c ai1
|
112 |
+
cai2 c ai2
|
113 |
+
cai3 c ai3
|
114 |
+
cai4 c ai4
|
115 |
+
cai5 c ai5
|
116 |
+
can1 c an1
|
117 |
+
can2 c an2
|
118 |
+
can3 c an3
|
119 |
+
can4 c an4
|
120 |
+
can5 c an5
|
121 |
+
cang1 c ang1
|
122 |
+
cang2 c ang2
|
123 |
+
cang3 c ang3
|
124 |
+
cang4 c ang4
|
125 |
+
cang5 c ang5
|
126 |
+
cao1 c ao1
|
127 |
+
cao2 c ao2
|
128 |
+
cao3 c ao3
|
129 |
+
cao4 c ao4
|
130 |
+
cao5 c ao5
|
131 |
+
ce1 c e1
|
132 |
+
ce2 c e2
|
133 |
+
ce3 c e3
|
134 |
+
ce4 c e4
|
135 |
+
ce5 c e5
|
136 |
+
cen1 c en1
|
137 |
+
cen2 c en2
|
138 |
+
cen3 c en3
|
139 |
+
cen4 c en4
|
140 |
+
cen5 c en5
|
141 |
+
ceng1 c eng1
|
142 |
+
ceng2 c eng2
|
143 |
+
ceng3 c eng3
|
144 |
+
ceng4 c eng4
|
145 |
+
ceng5 c eng5
|
146 |
+
cha1 ch a1
|
147 |
+
cha2 ch a2
|
148 |
+
cha3 ch a3
|
149 |
+
cha4 ch a4
|
150 |
+
cha5 ch a5
|
151 |
+
chai1 ch ai1
|
152 |
+
chai2 ch ai2
|
153 |
+
chai3 ch ai3
|
154 |
+
chai4 ch ai4
|
155 |
+
chai5 ch ai5
|
156 |
+
chan1 ch an1
|
157 |
+
chan2 ch an2
|
158 |
+
chan3 ch an3
|
159 |
+
chan4 ch an4
|
160 |
+
chan5 ch an5
|
161 |
+
chang1 ch ang1
|
162 |
+
chang2 ch ang2
|
163 |
+
chang3 ch ang3
|
164 |
+
chang4 ch ang4
|
165 |
+
chang5 ch ang5
|
166 |
+
chao1 ch ao1
|
167 |
+
chao2 ch ao2
|
168 |
+
chao3 ch ao3
|
169 |
+
chao4 ch ao4
|
170 |
+
chao5 ch ao5
|
171 |
+
che1 ch e1
|
172 |
+
che2 ch e2
|
173 |
+
che3 ch e3
|
174 |
+
che4 ch e4
|
175 |
+
che5 ch e5
|
176 |
+
chen1 ch en1
|
177 |
+
chen2 ch en2
|
178 |
+
chen3 ch en3
|
179 |
+
chen4 ch en4
|
180 |
+
chen5 ch en5
|
181 |
+
cheng1 ch eng1
|
182 |
+
cheng2 ch eng2
|
183 |
+
cheng3 ch eng3
|
184 |
+
cheng4 ch eng4
|
185 |
+
cheng5 ch eng5
|
186 |
+
chi1 ch iii1
|
187 |
+
chi2 ch iii2
|
188 |
+
chi3 ch iii3
|
189 |
+
chi4 ch iii4
|
190 |
+
chi5 ch iii5
|
191 |
+
chong1 ch ong1
|
192 |
+
chong2 ch ong2
|
193 |
+
chong3 ch ong3
|
194 |
+
chong4 ch ong4
|
195 |
+
chong5 ch ong5
|
196 |
+
chou1 ch ou1
|
197 |
+
chou2 ch ou2
|
198 |
+
chou3 ch ou3
|
199 |
+
chou4 ch ou4
|
200 |
+
chou5 ch ou5
|
201 |
+
chu1 ch u1
|
202 |
+
chu2 ch u2
|
203 |
+
chu3 ch u3
|
204 |
+
chu4 ch u4
|
205 |
+
chu5 ch u5
|
206 |
+
chuai1 ch uai1
|
207 |
+
chuai2 ch uai2
|
208 |
+
chuai3 ch uai3
|
209 |
+
chuai4 ch uai4
|
210 |
+
chuai5 ch uai5
|
211 |
+
chuan1 ch uan1
|
212 |
+
chuan2 ch uan2
|
213 |
+
chuan3 ch uan3
|
214 |
+
chuan4 ch uan4
|
215 |
+
chuan5 ch uan5
|
216 |
+
chuang1 ch uang1
|
217 |
+
chuang2 ch uang2
|
218 |
+
chuang3 ch uang3
|
219 |
+
chuang4 ch uang4
|
220 |
+
chuang5 ch uang5
|
221 |
+
chui1 ch uei1
|
222 |
+
chui2 ch uei2
|
223 |
+
chui3 ch uei3
|
224 |
+
chui4 ch uei4
|
225 |
+
chui5 ch uei5
|
226 |
+
chun1 ch uen1
|
227 |
+
chun2 ch uen2
|
228 |
+
chun3 ch uen3
|
229 |
+
chun4 ch uen4
|
230 |
+
chun5 ch uen5
|
231 |
+
chuo1 ch uo1
|
232 |
+
chuo2 ch uo2
|
233 |
+
chuo3 ch uo3
|
234 |
+
chuo4 ch uo4
|
235 |
+
chuo5 ch uo5
|
236 |
+
ci1 c ii1
|
237 |
+
ci2 c ii2
|
238 |
+
ci3 c ii3
|
239 |
+
ci4 c ii4
|
240 |
+
ci5 c ii5
|
241 |
+
cong1 c ong1
|
242 |
+
cong2 c ong2
|
243 |
+
cong3 c ong3
|
244 |
+
cong4 c ong4
|
245 |
+
cong5 c ong5
|
246 |
+
cou1 c ou1
|
247 |
+
cou2 c ou2
|
248 |
+
cou3 c ou3
|
249 |
+
cou4 c ou4
|
250 |
+
cou5 c ou5
|
251 |
+
cu1 c u1
|
252 |
+
cu2 c u2
|
253 |
+
cu3 c u3
|
254 |
+
cu4 c u4
|
255 |
+
cu5 c u5
|
256 |
+
cuan1 c uan1
|
257 |
+
cuan2 c uan2
|
258 |
+
cuan3 c uan3
|
259 |
+
cuan4 c uan4
|
260 |
+
cuan5 c uan5
|
261 |
+
cui1 c uei1
|
262 |
+
cui2 c uei2
|
263 |
+
cui3 c uei3
|
264 |
+
cui4 c uei4
|
265 |
+
cui5 c uei5
|
266 |
+
cun1 c uen1
|
267 |
+
cun2 c uen2
|
268 |
+
cun3 c uen3
|
269 |
+
cun4 c uen4
|
270 |
+
cun5 c uen5
|
271 |
+
cuo1 c uo1
|
272 |
+
cuo2 c uo2
|
273 |
+
cuo3 c uo3
|
274 |
+
cuo4 c uo4
|
275 |
+
cuo5 c uo5
|
276 |
+
da1 d a1
|
277 |
+
da2 d a2
|
278 |
+
da3 d a3
|
279 |
+
da4 d a4
|
280 |
+
da5 d a5
|
281 |
+
dai1 d ai1
|
282 |
+
dai2 d ai2
|
283 |
+
dai3 d ai3
|
284 |
+
dai4 d ai4
|
285 |
+
dai5 d ai5
|
286 |
+
dan1 d an1
|
287 |
+
dan2 d an2
|
288 |
+
dan3 d an3
|
289 |
+
dan4 d an4
|
290 |
+
dan5 d an5
|
291 |
+
dang1 d ang1
|
292 |
+
dang2 d ang2
|
293 |
+
dang3 d ang3
|
294 |
+
dang4 d ang4
|
295 |
+
dang5 d ang5
|
296 |
+
dao1 d ao1
|
297 |
+
dao2 d ao2
|
298 |
+
dao3 d ao3
|
299 |
+
dao4 d ao4
|
300 |
+
dao5 d ao5
|
301 |
+
de1 d e1
|
302 |
+
de2 d e2
|
303 |
+
de3 d e3
|
304 |
+
de4 d e4
|
305 |
+
de5 d e5
|
306 |
+
dei1 d ei1
|
307 |
+
dei2 d ei2
|
308 |
+
dei3 d ei3
|
309 |
+
dei4 d ei4
|
310 |
+
dei5 d ei5
|
311 |
+
den1 d en1
|
312 |
+
den2 d en2
|
313 |
+
den3 d en3
|
314 |
+
den4 d en4
|
315 |
+
den5 d en5
|
316 |
+
deng1 d eng1
|
317 |
+
deng2 d eng2
|
318 |
+
deng3 d eng3
|
319 |
+
deng4 d eng4
|
320 |
+
deng5 d eng5
|
321 |
+
di1 d i1
|
322 |
+
di2 d i2
|
323 |
+
di3 d i3
|
324 |
+
di4 d i4
|
325 |
+
di5 d i5
|
326 |
+
dia1 d ia1
|
327 |
+
dia2 d ia2
|
328 |
+
dia3 d ia3
|
329 |
+
dia4 d ia4
|
330 |
+
dia5 d ia5
|
331 |
+
dian1 d ian1
|
332 |
+
dian2 d ian2
|
333 |
+
dian3 d ian3
|
334 |
+
dian4 d ian4
|
335 |
+
dian5 d ian5
|
336 |
+
diao1 d iao1
|
337 |
+
diao2 d iao2
|
338 |
+
diao3 d iao3
|
339 |
+
diao4 d iao4
|
340 |
+
diao5 d iao5
|
341 |
+
die1 d ie1
|
342 |
+
die2 d ie2
|
343 |
+
die3 d ie3
|
344 |
+
die4 d ie4
|
345 |
+
die5 d ie5
|
346 |
+
ding1 d ing1
|
347 |
+
ding2 d ing2
|
348 |
+
ding3 d ing3
|
349 |
+
ding4 d ing4
|
350 |
+
ding5 d ing5
|
351 |
+
diu1 d iou1
|
352 |
+
diu2 d iou2
|
353 |
+
diu3 d iou3
|
354 |
+
diu4 d iou4
|
355 |
+
diu5 d iou5
|
356 |
+
dong1 d ong1
|
357 |
+
dong2 d ong2
|
358 |
+
dong3 d ong3
|
359 |
+
dong4 d ong4
|
360 |
+
dong5 d ong5
|
361 |
+
dou1 d ou1
|
362 |
+
dou2 d ou2
|
363 |
+
dou3 d ou3
|
364 |
+
dou4 d ou4
|
365 |
+
dou5 d ou5
|
366 |
+
du1 d u1
|
367 |
+
du2 d u2
|
368 |
+
du3 d u3
|
369 |
+
du4 d u4
|
370 |
+
du5 d u5
|
371 |
+
duan1 d uan1
|
372 |
+
duan2 d uan2
|
373 |
+
duan3 d uan3
|
374 |
+
duan4 d uan4
|
375 |
+
duan5 d uan5
|
376 |
+
dui1 d uei1
|
377 |
+
dui2 d uei2
|
378 |
+
dui3 d uei3
|
379 |
+
dui4 d uei4
|
380 |
+
dui5 d uei5
|
381 |
+
dun1 d uen1
|
382 |
+
dun2 d uen2
|
383 |
+
dun3 d uen3
|
384 |
+
dun4 d uen4
|
385 |
+
dun5 d uen5
|
386 |
+
duo1 d uo1
|
387 |
+
duo2 d uo2
|
388 |
+
duo3 d uo3
|
389 |
+
duo4 d uo4
|
390 |
+
duo5 d uo5
|
391 |
+
e1 e1
|
392 |
+
e2 e2
|
393 |
+
e3 e3
|
394 |
+
e4 e4
|
395 |
+
e5 e5
|
396 |
+
ei1 ei1
|
397 |
+
ei2 ei2
|
398 |
+
ei3 ei3
|
399 |
+
ei4 ei4
|
400 |
+
ei5 ei5
|
401 |
+
en1 en1
|
402 |
+
en2 en2
|
403 |
+
en3 en3
|
404 |
+
en4 en4
|
405 |
+
en5 en5
|
406 |
+
eng1 eng1
|
407 |
+
eng2 eng2
|
408 |
+
eng3 eng3
|
409 |
+
eng4 eng4
|
410 |
+
eng5 eng5
|
411 |
+
r1 er1
|
412 |
+
r2 er2
|
413 |
+
r3 er3
|
414 |
+
r4 er4
|
415 |
+
r5 er5
|
416 |
+
er1 er1
|
417 |
+
er2 er2
|
418 |
+
er3 er3
|
419 |
+
er4 er4
|
420 |
+
er5 er5
|
421 |
+
fa1 f a1
|
422 |
+
fa2 f a2
|
423 |
+
fa3 f a3
|
424 |
+
fa4 f a4
|
425 |
+
fa5 f a5
|
426 |
+
fan1 f an1
|
427 |
+
fan2 f an2
|
428 |
+
fan3 f an3
|
429 |
+
fan4 f an4
|
430 |
+
fan5 f an5
|
431 |
+
fang1 f ang1
|
432 |
+
fang2 f ang2
|
433 |
+
fang3 f ang3
|
434 |
+
fang4 f ang4
|
435 |
+
fang5 f ang5
|
436 |
+
fei1 f ei1
|
437 |
+
fei2 f ei2
|
438 |
+
fei3 f ei3
|
439 |
+
fei4 f ei4
|
440 |
+
fei5 f ei5
|
441 |
+
fen1 f en1
|
442 |
+
fen2 f en2
|
443 |
+
fen3 f en3
|
444 |
+
fen4 f en4
|
445 |
+
fen5 f en5
|
446 |
+
feng1 f eng1
|
447 |
+
feng2 f eng2
|
448 |
+
feng3 f eng3
|
449 |
+
feng4 f eng4
|
450 |
+
feng5 f eng5
|
451 |
+
fo1 f o1
|
452 |
+
fo2 f o2
|
453 |
+
fo3 f o3
|
454 |
+
fo4 f o4
|
455 |
+
fo5 f o5
|
456 |
+
fou1 f ou1
|
457 |
+
fou2 f ou2
|
458 |
+
fou3 f ou3
|
459 |
+
fou4 f ou4
|
460 |
+
fou5 f ou5
|
461 |
+
fu1 f u1
|
462 |
+
fu2 f u2
|
463 |
+
fu3 f u3
|
464 |
+
fu4 f u4
|
465 |
+
fu5 f u5
|
466 |
+
ga1 g a1
|
467 |
+
ga2 g a2
|
468 |
+
ga3 g a3
|
469 |
+
ga4 g a4
|
470 |
+
ga5 g a5
|
471 |
+
gai1 g ai1
|
472 |
+
gai2 g ai2
|
473 |
+
gai3 g ai3
|
474 |
+
gai4 g ai4
|
475 |
+
gai5 g ai5
|
476 |
+
gan1 g an1
|
477 |
+
gan2 g an2
|
478 |
+
gan3 g an3
|
479 |
+
gan4 g an4
|
480 |
+
gan5 g an5
|
481 |
+
gang1 g ang1
|
482 |
+
gang2 g ang2
|
483 |
+
gang3 g ang3
|
484 |
+
gang4 g ang4
|
485 |
+
gang5 g ang5
|
486 |
+
gao1 g ao1
|
487 |
+
gao2 g ao2
|
488 |
+
gao3 g ao3
|
489 |
+
gao4 g ao4
|
490 |
+
gao5 g ao5
|
491 |
+
ge1 g e1
|
492 |
+
ge2 g e2
|
493 |
+
ge3 g e3
|
494 |
+
ge4 g e4
|
495 |
+
ge5 g e5
|
496 |
+
gei1 g ei1
|
497 |
+
gei2 g ei2
|
498 |
+
gei3 g ei3
|
499 |
+
gei4 g ei4
|
500 |
+
gei5 g ei5
|
501 |
+
gen1 g en1
|
502 |
+
gen2 g en2
|
503 |
+
gen3 g en3
|
504 |
+
gen4 g en4
|
505 |
+
gen5 g en5
|
506 |
+
geng1 g eng1
|
507 |
+
geng2 g eng2
|
508 |
+
geng3 g eng3
|
509 |
+
geng4 g eng4
|
510 |
+
geng5 g eng5
|
511 |
+
gong1 g ong1
|
512 |
+
gong2 g ong2
|
513 |
+
gong3 g ong3
|
514 |
+
gong4 g ong4
|
515 |
+
gong5 g ong5
|
516 |
+
gou1 g ou1
|
517 |
+
gou2 g ou2
|
518 |
+
gou3 g ou3
|
519 |
+
gou4 g ou4
|
520 |
+
gou5 g ou5
|
521 |
+
gu1 g u1
|
522 |
+
gu2 g u2
|
523 |
+
gu3 g u3
|
524 |
+
gu4 g u4
|
525 |
+
gu5 g u5
|
526 |
+
gua1 g ua1
|
527 |
+
gua2 g ua2
|
528 |
+
gua3 g ua3
|
529 |
+
gua4 g ua4
|
530 |
+
gua5 g ua5
|
531 |
+
guai1 g uai1
|
532 |
+
guai2 g uai2
|
533 |
+
guai3 g uai3
|
534 |
+
guai4 g uai4
|
535 |
+
guai5 g uai5
|
536 |
+
guan1 g uan1
|
537 |
+
guan2 g uan2
|
538 |
+
guan3 g uan3
|
539 |
+
guan4 g uan4
|
540 |
+
guan5 g uan5
|
541 |
+
guang1 g uang1
|
542 |
+
guang2 g uang2
|
543 |
+
guang3 g uang3
|
544 |
+
guang4 g uang4
|
545 |
+
guang5 g uang5
|
546 |
+
gui1 g uei1
|
547 |
+
gui2 g uei2
|
548 |
+
gui3 g uei3
|
549 |
+
gui4 g uei4
|
550 |
+
gui5 g uei5
|
551 |
+
gun1 g uen1
|
552 |
+
gun2 g uen2
|
553 |
+
gun3 g uen3
|
554 |
+
gun4 g uen4
|
555 |
+
gun5 g uen5
|
556 |
+
guo1 g uo1
|
557 |
+
guo2 g uo2
|
558 |
+
guo3 g uo3
|
559 |
+
guo4 g uo4
|
560 |
+
guo5 g uo5
|
561 |
+
ha1 h a1
|
562 |
+
ha2 h a2
|
563 |
+
ha3 h a3
|
564 |
+
ha4 h a4
|
565 |
+
ha5 h a5
|
566 |
+
hai1 h ai1
|
567 |
+
hai2 h ai2
|
568 |
+
hai3 h ai3
|
569 |
+
hai4 h ai4
|
570 |
+
hai5 h ai5
|
571 |
+
han1 h an1
|
572 |
+
han2 h an2
|
573 |
+
han3 h an3
|
574 |
+
han4 h an4
|
575 |
+
han5 h an5
|
576 |
+
hang1 h ang1
|
577 |
+
hang2 h ang2
|
578 |
+
hang3 h ang3
|
579 |
+
hang4 h ang4
|
580 |
+
hang5 h ang5
|
581 |
+
hao1 h ao1
|
582 |
+
hao2 h ao2
|
583 |
+
hao3 h ao3
|
584 |
+
hao4 h ao4
|
585 |
+
hao5 h ao5
|
586 |
+
he1 h e1
|
587 |
+
he2 h e2
|
588 |
+
he3 h e3
|
589 |
+
he4 h e4
|
590 |
+
he5 h e5
|
591 |
+
hei1 h ei1
|
592 |
+
hei2 h ei2
|
593 |
+
hei3 h ei3
|
594 |
+
hei4 h ei4
|
595 |
+
hei5 h ei5
|
596 |
+
hen1 h en1
|
597 |
+
hen2 h en2
|
598 |
+
hen3 h en3
|
599 |
+
hen4 h en4
|
600 |
+
hen5 h en5
|
601 |
+
heng1 h eng1
|
602 |
+
heng2 h eng2
|
603 |
+
heng3 h eng3
|
604 |
+
heng4 h eng4
|
605 |
+
heng5 h eng5
|
606 |
+
hong1 h ong1
|
607 |
+
hong2 h ong2
|
608 |
+
hong3 h ong3
|
609 |
+
hong4 h ong4
|
610 |
+
hong5 h ong5
|
611 |
+
hou1 h ou1
|
612 |
+
hou2 h ou2
|
613 |
+
hou3 h ou3
|
614 |
+
hou4 h ou4
|
615 |
+
hou5 h ou5
|
616 |
+
hu1 h u1
|
617 |
+
hu2 h u2
|
618 |
+
hu3 h u3
|
619 |
+
hu4 h u4
|
620 |
+
hu5 h u5
|
621 |
+
hua1 h ua1
|
622 |
+
hua2 h ua2
|
623 |
+
hua3 h ua3
|
624 |
+
hua4 h ua4
|
625 |
+
hua5 h ua5
|
626 |
+
huai1 h uai1
|
627 |
+
huai2 h uai2
|
628 |
+
huai3 h uai3
|
629 |
+
huai4 h uai4
|
630 |
+
huai5 h uai5
|
631 |
+
huan1 h uan1
|
632 |
+
huan2 h uan2
|
633 |
+
huan3 h uan3
|
634 |
+
huan4 h uan4
|
635 |
+
huan5 h uan5
|
636 |
+
huang1 h uang1
|
637 |
+
huang2 h uang2
|
638 |
+
huang3 h uang3
|
639 |
+
huang4 h uang4
|
640 |
+
huang5 h uang5
|
641 |
+
hui1 h uei1
|
642 |
+
hui2 h uei2
|
643 |
+
hui3 h uei3
|
644 |
+
hui4 h uei4
|
645 |
+
hui5 h uei5
|
646 |
+
hun1 h uen1
|
647 |
+
hun2 h uen2
|
648 |
+
hun3 h uen3
|
649 |
+
hun4 h uen4
|
650 |
+
hun5 h uen5
|
651 |
+
huo1 h uo1
|
652 |
+
huo2 h uo2
|
653 |
+
huo3 h uo3
|
654 |
+
huo4 h uo4
|
655 |
+
huo5 h uo5
|
656 |
+
ji1 j i1
|
657 |
+
ji2 j i2
|
658 |
+
ji3 j i3
|
659 |
+
ji4 j i4
|
660 |
+
ji5 j i5
|
661 |
+
jia1 j ia1
|
662 |
+
jia2 j ia2
|
663 |
+
jia3 j ia3
|
664 |
+
jia4 j ia4
|
665 |
+
jia5 j ia5
|
666 |
+
jian1 j ian1
|
667 |
+
jian2 j ian2
|
668 |
+
jian3 j ian3
|
669 |
+
jian4 j ian4
|
670 |
+
jian5 j ian5
|
671 |
+
jiang1 j iang1
|
672 |
+
jiang2 j iang2
|
673 |
+
jiang3 j iang3
|
674 |
+
jiang4 j iang4
|
675 |
+
jiang5 j iang5
|
676 |
+
jiao1 j iao1
|
677 |
+
jiao2 j iao2
|
678 |
+
jiao3 j iao3
|
679 |
+
jiao4 j iao4
|
680 |
+
jiao5 j iao5
|
681 |
+
jie1 j ie1
|
682 |
+
jie2 j ie2
|
683 |
+
jie3 j ie3
|
684 |
+
jie4 j ie4
|
685 |
+
jie5 j ie5
|
686 |
+
jin1 j in1
|
687 |
+
jin2 j in2
|
688 |
+
jin3 j in3
|
689 |
+
jin4 j in4
|
690 |
+
jin5 j in5
|
691 |
+
jing1 j ing1
|
692 |
+
jing2 j ing2
|
693 |
+
jing3 j ing3
|
694 |
+
jing4 j ing4
|
695 |
+
jing5 j ing5
|
696 |
+
jiong1 j iong1
|
697 |
+
jiong2 j iong2
|
698 |
+
jiong3 j iong3
|
699 |
+
jiong4 j iong4
|
700 |
+
jiong5 j iong5
|
701 |
+
jiu1 j iou1
|
702 |
+
jiu2 j iou2
|
703 |
+
jiu3 j iou3
|
704 |
+
jiu4 j iou4
|
705 |
+
jiu5 j iou5
|
706 |
+
ju1 j v1
|
707 |
+
ju2 j v2
|
708 |
+
ju3 j v3
|
709 |
+
ju4 j v4
|
710 |
+
ju5 j v5
|
711 |
+
juan1 j van1
|
712 |
+
juan2 j van2
|
713 |
+
juan3 j van3
|
714 |
+
juan4 j van4
|
715 |
+
juan5 j van5
|
716 |
+
jue1 j ve1
|
717 |
+
jue2 j ve2
|
718 |
+
jue3 j ve3
|
719 |
+
jue4 j ve4
|
720 |
+
jue5 j ve5
|
721 |
+
jun1 j vn1
|
722 |
+
jun2 j vn2
|
723 |
+
jun3 j vn3
|
724 |
+
jun4 j vn4
|
725 |
+
jun5 j vn5
|
726 |
+
ka1 k a1
|
727 |
+
ka2 k a2
|
728 |
+
ka3 k a3
|
729 |
+
ka4 k a4
|
730 |
+
ka5 k a5
|
731 |
+
kai1 k ai1
|
732 |
+
kai2 k ai2
|
733 |
+
kai3 k ai3
|
734 |
+
kai4 k ai4
|
735 |
+
kai5 k ai5
|
736 |
+
kan1 k an1
|
737 |
+
kan2 k an2
|
738 |
+
kan3 k an3
|
739 |
+
kan4 k an4
|
740 |
+
kan5 k an5
|
741 |
+
kang1 k ang1
|
742 |
+
kang2 k ang2
|
743 |
+
kang3 k ang3
|
744 |
+
kang4 k ang4
|
745 |
+
kang5 k ang5
|
746 |
+
kao1 k ao1
|
747 |
+
kao2 k ao2
|
748 |
+
kao3 k ao3
|
749 |
+
kao4 k ao4
|
750 |
+
kao5 k ao5
|
751 |
+
ke1 k e1
|
752 |
+
ke2 k e2
|
753 |
+
ke3 k e3
|
754 |
+
ke4 k e4
|
755 |
+
ke5 k e5
|
756 |
+
kei1 k ei1
|
757 |
+
kei2 k ei2
|
758 |
+
kei3 k ei3
|
759 |
+
kei4 k ei4
|
760 |
+
kei5 k ei5
|
761 |
+
ken1 k en1
|
762 |
+
ken2 k en2
|
763 |
+
ken3 k en3
|
764 |
+
ken4 k en4
|
765 |
+
ken5 k en5
|
766 |
+
keng1 k eng1
|
767 |
+
keng2 k eng2
|
768 |
+
keng3 k eng3
|
769 |
+
keng4 k eng4
|
770 |
+
keng5 k eng5
|
771 |
+
kong1 k ong1
|
772 |
+
kong2 k ong2
|
773 |
+
kong3 k ong3
|
774 |
+
kong4 k ong4
|
775 |
+
kong5 k ong5
|
776 |
+
kou1 k ou1
|
777 |
+
kou2 k ou2
|
778 |
+
kou3 k ou3
|
779 |
+
kou4 k ou4
|
780 |
+
kou5 k ou5
|
781 |
+
ku1 k u1
|
782 |
+
ku2 k u2
|
783 |
+
ku3 k u3
|
784 |
+
ku4 k u4
|
785 |
+
ku5 k u5
|
786 |
+
kua1 k ua1
|
787 |
+
kua2 k ua2
|
788 |
+
kua3 k ua3
|
789 |
+
kua4 k ua4
|
790 |
+
kua5 k ua5
|
791 |
+
kuai1 k uai1
|
792 |
+
kuai2 k uai2
|
793 |
+
kuai3 k uai3
|
794 |
+
kuai4 k uai4
|
795 |
+
kuai5 k uai5
|
796 |
+
kuan1 k uan1
|
797 |
+
kuan2 k uan2
|
798 |
+
kuan3 k uan3
|
799 |
+
kuan4 k uan4
|
800 |
+
kuan5 k uan5
|
801 |
+
kuang1 k uang1
|
802 |
+
kuang2 k uang2
|
803 |
+
kuang3 k uang3
|
804 |
+
kuang4 k uang4
|
805 |
+
kuang5 k uang5
|
806 |
+
kui1 k uei1
|
807 |
+
kui2 k uei2
|
808 |
+
kui3 k uei3
|
809 |
+
kui4 k uei4
|
810 |
+
kui5 k uei5
|
811 |
+
kun1 k uen1
|
812 |
+
kun2 k uen2
|
813 |
+
kun3 k uen3
|
814 |
+
kun4 k uen4
|
815 |
+
kun5 k uen5
|
816 |
+
kuo1 k uo1
|
817 |
+
kuo2 k uo2
|
818 |
+
kuo3 k uo3
|
819 |
+
kuo4 k uo4
|
820 |
+
kuo5 k uo5
|
821 |
+
la1 l a1
|
822 |
+
la2 l a2
|
823 |
+
la3 l a3
|
824 |
+
la4 l a4
|
825 |
+
la5 l a5
|
826 |
+
lai1 l ai1
|
827 |
+
lai2 l ai2
|
828 |
+
lai3 l ai3
|
829 |
+
lai4 l ai4
|
830 |
+
lai5 l ai5
|
831 |
+
lan1 l an1
|
832 |
+
lan2 l an2
|
833 |
+
lan3 l an3
|
834 |
+
lan4 l an4
|
835 |
+
lan5 l an5
|
836 |
+
lang1 l ang1
|
837 |
+
lang2 l ang2
|
838 |
+
lang3 l ang3
|
839 |
+
lang4 l ang4
|
840 |
+
lang5 l ang5
|
841 |
+
lao1 l ao1
|
842 |
+
lao2 l ao2
|
843 |
+
lao3 l ao3
|
844 |
+
lao4 l ao4
|
845 |
+
lao5 l ao5
|
846 |
+
le1 l e1
|
847 |
+
le2 l e2
|
848 |
+
le3 l e3
|
849 |
+
le4 l e4
|
850 |
+
le5 l e5
|
851 |
+
lei1 l ei1
|
852 |
+
lei2 l ei2
|
853 |
+
lei3 l ei3
|
854 |
+
lei4 l ei4
|
855 |
+
lei5 l ei5
|
856 |
+
leng1 l eng1
|
857 |
+
leng2 l eng2
|
858 |
+
leng3 l eng3
|
859 |
+
leng4 l eng4
|
860 |
+
leng5 l eng5
|
861 |
+
li1 l i1
|
862 |
+
li2 l i2
|
863 |
+
li3 l i3
|
864 |
+
li4 l i4
|
865 |
+
li5 l i5
|
866 |
+
lia1 l ia1
|
867 |
+
lia2 l ia2
|
868 |
+
lia3 l ia3
|
869 |
+
lia4 l ia4
|
870 |
+
lia5 l ia5
|
871 |
+
lian1 l ian1
|
872 |
+
lian2 l ian2
|
873 |
+
lian3 l ian3
|
874 |
+
lian4 l ian4
|
875 |
+
lian5 l ian5
|
876 |
+
liang1 l iang1
|
877 |
+
liang2 l iang2
|
878 |
+
liang3 l iang3
|
879 |
+
liang4 l iang4
|
880 |
+
liang5 l iang5
|
881 |
+
liao1 l iao1
|
882 |
+
liao2 l iao2
|
883 |
+
liao3 l iao3
|
884 |
+
liao4 l iao4
|
885 |
+
liao5 l iao5
|
886 |
+
lie1 l ie1
|
887 |
+
lie2 l ie2
|
888 |
+
lie3 l ie3
|
889 |
+
lie4 l ie4
|
890 |
+
lie5 l ie5
|
891 |
+
lin1 l in1
|
892 |
+
lin2 l in2
|
893 |
+
lin3 l in3
|
894 |
+
lin4 l in4
|
895 |
+
lin5 l in5
|
896 |
+
ling1 l ing1
|
897 |
+
ling2 l ing2
|
898 |
+
ling3 l ing3
|
899 |
+
ling4 l ing4
|
900 |
+
ling5 l ing5
|
901 |
+
liu1 l iou1
|
902 |
+
liu2 l iou2
|
903 |
+
liu3 l iou3
|
904 |
+
liu4 l iou4
|
905 |
+
liu5 l iou5
|
906 |
+
lo1 l o1
|
907 |
+
lo2 l o2
|
908 |
+
lo3 l o3
|
909 |
+
lo4 l o4
|
910 |
+
lo5 l o5
|
911 |
+
long1 l ong1
|
912 |
+
long2 l ong2
|
913 |
+
long3 l ong3
|
914 |
+
long4 l ong4
|
915 |
+
long5 l ong5
|
916 |
+
lou1 l ou1
|
917 |
+
lou2 l ou2
|
918 |
+
lou3 l ou3
|
919 |
+
lou4 l ou4
|
920 |
+
lou5 l ou5
|
921 |
+
lu1 l u1
|
922 |
+
lu2 l u2
|
923 |
+
lu3 l u3
|
924 |
+
lu4 l u4
|
925 |
+
lu5 l u5
|
926 |
+
luan1 l uan1
|
927 |
+
luan2 l uan2
|
928 |
+
luan3 l uan3
|
929 |
+
luan4 l uan4
|
930 |
+
luan5 l uan5
|
931 |
+
lue1 l ve1
|
932 |
+
lue2 l ve2
|
933 |
+
lue3 l ve3
|
934 |
+
lue4 l ve4
|
935 |
+
lue5 l ve5
|
936 |
+
lve1 l ve1
|
937 |
+
lve2 l ve2
|
938 |
+
lve3 l ve3
|
939 |
+
lve4 l ve4
|
940 |
+
lve5 l ve5
|
941 |
+
lun1 l uen1
|
942 |
+
lun2 l uen2
|
943 |
+
lun3 l uen3
|
944 |
+
lun4 l uen4
|
945 |
+
lun5 l uen5
|
946 |
+
luo1 l uo1
|
947 |
+
luo2 l uo2
|
948 |
+
luo3 l uo3
|
949 |
+
luo4 l uo4
|
950 |
+
luo5 l uo5
|
951 |
+
lv1 l v1
|
952 |
+
lv2 l v2
|
953 |
+
lv3 l v3
|
954 |
+
lv4 l v4
|
955 |
+
lv5 l v5
|
956 |
+
ma1 m a1
|
957 |
+
ma2 m a2
|
958 |
+
ma3 m a3
|
959 |
+
ma4 m a4
|
960 |
+
ma5 m a5
|
961 |
+
mai1 m ai1
|
962 |
+
mai2 m ai2
|
963 |
+
mai3 m ai3
|
964 |
+
mai4 m ai4
|
965 |
+
mai5 m ai5
|
966 |
+
man1 m an1
|
967 |
+
man2 m an2
|
968 |
+
man3 m an3
|
969 |
+
man4 m an4
|
970 |
+
man5 m an5
|
971 |
+
mang1 m ang1
|
972 |
+
mang2 m ang2
|
973 |
+
mang3 m ang3
|
974 |
+
mang4 m ang4
|
975 |
+
mang5 m ang5
|
976 |
+
mao1 m ao1
|
977 |
+
mao2 m ao2
|
978 |
+
mao3 m ao3
|
979 |
+
mao4 m ao4
|
980 |
+
mao5 m ao5
|
981 |
+
me1 m e1
|
982 |
+
me2 m e2
|
983 |
+
me3 m e3
|
984 |
+
me4 m e4
|
985 |
+
me5 m e5
|
986 |
+
mei1 m ei1
|
987 |
+
mei2 m ei2
|
988 |
+
mei3 m ei3
|
989 |
+
mei4 m ei4
|
990 |
+
mei5 m ei5
|
991 |
+
men1 m en1
|
992 |
+
men2 m en2
|
993 |
+
men3 m en3
|
994 |
+
men4 m en4
|
995 |
+
men5 m en5
|
996 |
+
meng1 m eng1
|
997 |
+
meng2 m eng2
|
998 |
+
meng3 m eng3
|
999 |
+
meng4 m eng4
|
1000 |
+
meng5 m eng5
|
1001 |
+
mi1 m i1
|
1002 |
+
mi2 m i2
|
1003 |
+
mi3 m i3
|
1004 |
+
mi4 m i4
|
1005 |
+
mi5 m i5
|
1006 |
+
mian1 m ian1
|
1007 |
+
mian2 m ian2
|
1008 |
+
mian3 m ian3
|
1009 |
+
mian4 m ian4
|
1010 |
+
mian5 m ian5
|
1011 |
+
miao1 m iao1
|
1012 |
+
miao2 m iao2
|
1013 |
+
miao3 m iao3
|
1014 |
+
miao4 m iao4
|
1015 |
+
miao5 m iao5
|
1016 |
+
mie1 m ie1
|
1017 |
+
mie2 m ie2
|
1018 |
+
mie3 m ie3
|
1019 |
+
mie4 m ie4
|
1020 |
+
mie5 m ie5
|
1021 |
+
min1 m in1
|
1022 |
+
min2 m in2
|
1023 |
+
min3 m in3
|
1024 |
+
min4 m in4
|
1025 |
+
min5 m in5
|
1026 |
+
ming1 m ing1
|
1027 |
+
ming2 m ing2
|
1028 |
+
ming3 m ing3
|
1029 |
+
ming4 m ing4
|
1030 |
+
ming5 m ing5
|
1031 |
+
miu1 m iou1
|
1032 |
+
miu2 m iou2
|
1033 |
+
miu3 m iou3
|
1034 |
+
miu4 m iou4
|
1035 |
+
miu5 m iou5
|
1036 |
+
mo1 m o1
|
1037 |
+
mo2 m o2
|
1038 |
+
mo3 m o3
|
1039 |
+
mo4 m o4
|
1040 |
+
mo5 m o5
|
1041 |
+
mou1 m ou1
|
1042 |
+
mou2 m ou2
|
1043 |
+
mou3 m ou3
|
1044 |
+
mou4 m ou4
|
1045 |
+
mou5 m ou5
|
1046 |
+
mu1 m u1
|
1047 |
+
mu2 m u2
|
1048 |
+
mu3 m u3
|
1049 |
+
mu4 m u4
|
1050 |
+
mu5 m u5
|
1051 |
+
na1 n a1
|
1052 |
+
na2 n a2
|
1053 |
+
na3 n a3
|
1054 |
+
na4 n a4
|
1055 |
+
na5 n a5
|
1056 |
+
nai1 n ai1
|
1057 |
+
nai2 n ai2
|
1058 |
+
nai3 n ai3
|
1059 |
+
nai4 n ai4
|
1060 |
+
nai5 n ai5
|
1061 |
+
nan1 n an1
|
1062 |
+
nan2 n an2
|
1063 |
+
nan3 n an3
|
1064 |
+
nan4 n an4
|
1065 |
+
nan5 n an5
|
1066 |
+
nang1 n ang1
|
1067 |
+
nang2 n ang2
|
1068 |
+
nang3 n ang3
|
1069 |
+
nang4 n ang4
|
1070 |
+
nang5 n ang5
|
1071 |
+
nao1 n ao1
|
1072 |
+
nao2 n ao2
|
1073 |
+
nao3 n ao3
|
1074 |
+
nao4 n ao4
|
1075 |
+
nao5 n ao5
|
1076 |
+
ne1 n e1
|
1077 |
+
ne2 n e2
|
1078 |
+
ne3 n e3
|
1079 |
+
ne4 n e4
|
1080 |
+
ne5 n e5
|
1081 |
+
nei1 n ei1
|
1082 |
+
nei2 n ei2
|
1083 |
+
nei3 n ei3
|
1084 |
+
nei4 n ei4
|
1085 |
+
nei5 n ei5
|
1086 |
+
nen1 n en1
|
1087 |
+
nen2 n en2
|
1088 |
+
nen3 n en3
|
1089 |
+
nen4 n en4
|
1090 |
+
nen5 n en5
|
1091 |
+
neng1 n eng1
|
1092 |
+
neng2 n eng2
|
1093 |
+
neng3 n eng3
|
1094 |
+
neng4 n eng4
|
1095 |
+
neng5 n eng5
|
1096 |
+
ni1 n i1
|
1097 |
+
ni2 n i2
|
1098 |
+
ni3 n i3
|
1099 |
+
ni4 n i4
|
1100 |
+
ni5 n i5
|
1101 |
+
nian1 n ian1
|
1102 |
+
nian2 n ian2
|
1103 |
+
nian3 n ian3
|
1104 |
+
nian4 n ian4
|
1105 |
+
nian5 n ian5
|
1106 |
+
niang1 n iang1
|
1107 |
+
niang2 n iang2
|
1108 |
+
niang3 n iang3
|
1109 |
+
niang4 n iang4
|
1110 |
+
niang5 n iang5
|
1111 |
+
niao1 n iao1
|
1112 |
+
niao2 n iao2
|
1113 |
+
niao3 n iao3
|
1114 |
+
niao4 n iao4
|
1115 |
+
niao5 n iao5
|
1116 |
+
nie1 n ie1
|
1117 |
+
nie2 n ie2
|
1118 |
+
nie3 n ie3
|
1119 |
+
nie4 n ie4
|
1120 |
+
nie5 n ie5
|
1121 |
+
nin1 n in1
|
1122 |
+
nin2 n in2
|
1123 |
+
nin3 n in3
|
1124 |
+
nin4 n in4
|
1125 |
+
nin5 n in5
|
1126 |
+
ning1 n ing1
|
1127 |
+
ning2 n ing2
|
1128 |
+
ning3 n ing3
|
1129 |
+
ning4 n ing4
|
1130 |
+
ning5 n ing5
|
1131 |
+
niu1 n iou1
|
1132 |
+
niu2 n iou2
|
1133 |
+
niu3 n iou3
|
1134 |
+
niu4 n iou4
|
1135 |
+
niu5 n iou5
|
1136 |
+
nong1 n ong1
|
1137 |
+
nong2 n ong2
|
1138 |
+
nong3 n ong3
|
1139 |
+
nong4 n ong4
|
1140 |
+
nong5 n ong5
|
1141 |
+
nou1 n ou1
|
1142 |
+
nou2 n ou2
|
1143 |
+
nou3 n ou3
|
1144 |
+
nou4 n ou4
|
1145 |
+
nou5 n ou5
|
1146 |
+
nu1 n u1
|
1147 |
+
nu2 n u2
|
1148 |
+
nu3 n u3
|
1149 |
+
nu4 n u4
|
1150 |
+
nu5 n u5
|
1151 |
+
nuan1 n uan1
|
1152 |
+
nuan2 n uan2
|
1153 |
+
nuan3 n uan3
|
1154 |
+
nuan4 n uan4
|
1155 |
+
nuan5 n uan5
|
1156 |
+
nue1 n ve1
|
1157 |
+
nue2 n ve2
|
1158 |
+
nue3 n ve3
|
1159 |
+
nue4 n ve4
|
1160 |
+
nue5 n ve5
|
1161 |
+
nve1 n ve1
|
1162 |
+
nve2 n ve2
|
1163 |
+
nve3 n ve3
|
1164 |
+
nve4 n ve4
|
1165 |
+
nve5 n ve5
|
1166 |
+
nuo1 n uo1
|
1167 |
+
nuo2 n uo2
|
1168 |
+
nuo3 n uo3
|
1169 |
+
nuo4 n uo4
|
1170 |
+
nuo5 n uo5
|
1171 |
+
nv1 n v1
|
1172 |
+
nv2 n v2
|
1173 |
+
nv3 n v3
|
1174 |
+
nv4 n v4
|
1175 |
+
nv5 n v5
|
1176 |
+
o1 o1
|
1177 |
+
o2 o2
|
1178 |
+
o3 o3
|
1179 |
+
o4 o4
|
1180 |
+
o5 o5
|
1181 |
+
ou1 ou1
|
1182 |
+
ou2 ou2
|
1183 |
+
ou3 ou3
|
1184 |
+
ou4 ou4
|
1185 |
+
ou5 ou5
|
1186 |
+
pa1 p a1
|
1187 |
+
pa2 p a2
|
1188 |
+
pa3 p a3
|
1189 |
+
pa4 p a4
|
1190 |
+
pa5 p a5
|
1191 |
+
pai1 p ai1
|
1192 |
+
pai2 p ai2
|
1193 |
+
pai3 p ai3
|
1194 |
+
pai4 p ai4
|
1195 |
+
pai5 p ai5
|
1196 |
+
pan1 p an1
|
1197 |
+
pan2 p an2
|
1198 |
+
pan3 p an3
|
1199 |
+
pan4 p an4
|
1200 |
+
pan5 p an5
|
1201 |
+
pang1 p ang1
|
1202 |
+
pang2 p ang2
|
1203 |
+
pang3 p ang3
|
1204 |
+
pang4 p ang4
|
1205 |
+
pang5 p ang5
|
1206 |
+
pao1 p ao1
|
1207 |
+
pao2 p ao2
|
1208 |
+
pao3 p ao3
|
1209 |
+
pao4 p ao4
|
1210 |
+
pao5 p ao5
|
1211 |
+
pei1 p ei1
|
1212 |
+
pei2 p ei2
|
1213 |
+
pei3 p ei3
|
1214 |
+
pei4 p ei4
|
1215 |
+
pei5 p ei5
|
1216 |
+
pen1 p en1
|
1217 |
+
pen2 p en2
|
1218 |
+
pen3 p en3
|
1219 |
+
pen4 p en4
|
1220 |
+
pen5 p en5
|
1221 |
+
peng1 p eng1
|
1222 |
+
peng2 p eng2
|
1223 |
+
peng3 p eng3
|
1224 |
+
peng4 p eng4
|
1225 |
+
peng5 p eng5
|
1226 |
+
pi1 p i1
|
1227 |
+
pi2 p i2
|
1228 |
+
pi3 p i3
|
1229 |
+
pi4 p i4
|
1230 |
+
pi5 p i5
|
1231 |
+
pian1 p ian1
|
1232 |
+
pian2 p ian2
|
1233 |
+
pian3 p ian3
|
1234 |
+
pian4 p ian4
|
1235 |
+
pian5 p ian5
|
1236 |
+
piao1 p iao1
|
1237 |
+
piao2 p iao2
|
1238 |
+
piao3 p iao3
|
1239 |
+
piao4 p iao4
|
1240 |
+
piao5 p iao5
|
1241 |
+
pie1 p ie1
|
1242 |
+
pie2 p ie2
|
1243 |
+
pie3 p ie3
|
1244 |
+
pie4 p ie4
|
1245 |
+
pie5 p ie5
|
1246 |
+
pin1 p in1
|
1247 |
+
pin2 p in2
|
1248 |
+
pin3 p in3
|
1249 |
+
pin4 p in4
|
1250 |
+
pin5 p in5
|
1251 |
+
ping1 p ing1
|
1252 |
+
ping2 p ing2
|
1253 |
+
ping3 p ing3
|
1254 |
+
ping4 p ing4
|
1255 |
+
ping5 p ing5
|
1256 |
+
po1 p o1
|
1257 |
+
po2 p o2
|
1258 |
+
po3 p o3
|
1259 |
+
po4 p o4
|
1260 |
+
po5 p o5
|
1261 |
+
pou1 p ou1
|
1262 |
+
pou2 p ou2
|
1263 |
+
pou3 p ou3
|
1264 |
+
pou4 p ou4
|
1265 |
+
pou5 p ou5
|
1266 |
+
pu1 p u1
|
1267 |
+
pu2 p u2
|
1268 |
+
pu3 p u3
|
1269 |
+
pu4 p u4
|
1270 |
+
pu5 p u5
|
1271 |
+
qi1 q i1
|
1272 |
+
qi2 q i2
|
1273 |
+
qi3 q i3
|
1274 |
+
qi4 q i4
|
1275 |
+
qi5 q i5
|
1276 |
+
qia1 q ia1
|
1277 |
+
qia2 q ia2
|
1278 |
+
qia3 q ia3
|
1279 |
+
qia4 q ia4
|
1280 |
+
qia5 q ia5
|
1281 |
+
qian1 q ian1
|
1282 |
+
qian2 q ian2
|
1283 |
+
qian3 q ian3
|
1284 |
+
qian4 q ian4
|
1285 |
+
qian5 q ian5
|
1286 |
+
qiang1 q iang1
|
1287 |
+
qiang2 q iang2
|
1288 |
+
qiang3 q iang3
|
1289 |
+
qiang4 q iang4
|
1290 |
+
qiang5 q iang5
|
1291 |
+
qiao1 q iao1
|
1292 |
+
qiao2 q iao2
|
1293 |
+
qiao3 q iao3
|
1294 |
+
qiao4 q iao4
|
1295 |
+
qiao5 q iao5
|
1296 |
+
qie1 q ie1
|
1297 |
+
qie2 q ie2
|
1298 |
+
qie3 q ie3
|
1299 |
+
qie4 q ie4
|
1300 |
+
qie5 q ie5
|
1301 |
+
qin1 q in1
|
1302 |
+
qin2 q in2
|
1303 |
+
qin3 q in3
|
1304 |
+
qin4 q in4
|
1305 |
+
qin5 q in5
|
1306 |
+
qing1 q ing1
|
1307 |
+
qing2 q ing2
|
1308 |
+
qing3 q ing3
|
1309 |
+
qing4 q ing4
|
1310 |
+
qing5 q ing5
|
1311 |
+
qiong1 q iong1
|
1312 |
+
qiong2 q iong2
|
1313 |
+
qiong3 q iong3
|
1314 |
+
qiong4 q iong4
|
1315 |
+
qiong5 q iong5
|
1316 |
+
qiu1 q iou1
|
1317 |
+
qiu2 q iou2
|
1318 |
+
qiu3 q iou3
|
1319 |
+
qiu4 q iou4
|
1320 |
+
qiu5 q iou5
|
1321 |
+
qu1 q v1
|
1322 |
+
qu2 q v2
|
1323 |
+
qu3 q v3
|
1324 |
+
qu4 q v4
|
1325 |
+
qu5 q v5
|
1326 |
+
quan1 q van1
|
1327 |
+
quan2 q van2
|
1328 |
+
quan3 q van3
|
1329 |
+
quan4 q van4
|
1330 |
+
quan5 q van5
|
1331 |
+
que1 q ve1
|
1332 |
+
que2 q ve2
|
1333 |
+
que3 q ve3
|
1334 |
+
que4 q ve4
|
1335 |
+
que5 q ve5
|
1336 |
+
qun1 q vn1
|
1337 |
+
qun2 q vn2
|
1338 |
+
qun3 q vn3
|
1339 |
+
qun4 q vn4
|
1340 |
+
qun5 q vn5
|
1341 |
+
ran1 r an1
|
1342 |
+
ran2 r an2
|
1343 |
+
ran3 r an3
|
1344 |
+
ran4 r an4
|
1345 |
+
ran5 r an5
|
1346 |
+
rang1 r ang1
|
1347 |
+
rang2 r ang2
|
1348 |
+
rang3 r ang3
|
1349 |
+
rang4 r ang4
|
1350 |
+
rang5 r ang5
|
1351 |
+
rao1 r ao1
|
1352 |
+
rao2 r ao2
|
1353 |
+
rao3 r ao3
|
1354 |
+
rao4 r ao4
|
1355 |
+
rao5 r ao5
|
1356 |
+
re1 r e1
|
1357 |
+
re2 r e2
|
1358 |
+
re3 r e3
|
1359 |
+
re4 r e4
|
1360 |
+
re5 r e5
|
1361 |
+
ren1 r en1
|
1362 |
+
ren2 r en2
|
1363 |
+
ren3 r en3
|
1364 |
+
ren4 r en4
|
1365 |
+
ren5 r en5
|
1366 |
+
reng1 r eng1
|
1367 |
+
reng2 r eng2
|
1368 |
+
reng3 r eng3
|
1369 |
+
reng4 r eng4
|
1370 |
+
reng5 r eng5
|
1371 |
+
ri1 r iii1
|
1372 |
+
ri2 r iii2
|
1373 |
+
ri3 r iii3
|
1374 |
+
ri4 r iii4
|
1375 |
+
ri5 r iii5
|
1376 |
+
rong1 r ong1
|
1377 |
+
rong2 r ong2
|
1378 |
+
rong3 r ong3
|
1379 |
+
rong4 r ong4
|
1380 |
+
rong5 r ong5
|
1381 |
+
rou1 r ou1
|
1382 |
+
rou2 r ou2
|
1383 |
+
rou3 r ou3
|
1384 |
+
rou4 r ou4
|
1385 |
+
rou5 r ou5
|
1386 |
+
ru1 r u1
|
1387 |
+
ru2 r u2
|
1388 |
+
ru3 r u3
|
1389 |
+
ru4 r u4
|
1390 |
+
ru5 r u5
|
1391 |
+
rua1 r ua1
|
1392 |
+
rua2 r ua2
|
1393 |
+
rua3 r ua3
|
1394 |
+
rua4 r ua4
|
1395 |
+
rua5 r ua5
|
1396 |
+
ruan1 r uan1
|
1397 |
+
ruan2 r uan2
|
1398 |
+
ruan3 r uan3
|
1399 |
+
ruan4 r uan4
|
1400 |
+
ruan5 r uan5
|
1401 |
+
rui1 r uei1
|
1402 |
+
rui2 r uei2
|
1403 |
+
rui3 r uei3
|
1404 |
+
rui4 r uei4
|
1405 |
+
rui5 r uei5
|
1406 |
+
run1 r uen1
|
1407 |
+
run2 r uen2
|
1408 |
+
run3 r uen3
|
1409 |
+
run4 r uen4
|
1410 |
+
run5 r uen5
|
1411 |
+
ruo1 r uo1
|
1412 |
+
ruo2 r uo2
|
1413 |
+
ruo3 r uo3
|
1414 |
+
ruo4 r uo4
|
1415 |
+
ruo5 r uo5
|
1416 |
+
sa1 s a1
|
1417 |
+
sa2 s a2
|
1418 |
+
sa3 s a3
|
1419 |
+
sa4 s a4
|
1420 |
+
sa5 s a5
|
1421 |
+
sai1 s ai1
|
1422 |
+
sai2 s ai2
|
1423 |
+
sai3 s ai3
|
1424 |
+
sai4 s ai4
|
1425 |
+
sai5 s ai5
|
1426 |
+
san1 s an1
|
1427 |
+
san2 s an2
|
1428 |
+
san3 s an3
|
1429 |
+
san4 s an4
|
1430 |
+
san5 s an5
|
1431 |
+
sang1 s ang1
|
1432 |
+
sang2 s ang2
|
1433 |
+
sang3 s ang3
|
1434 |
+
sang4 s ang4
|
1435 |
+
sang5 s ang5
|
1436 |
+
sao1 s ao1
|
1437 |
+
sao2 s ao2
|
1438 |
+
sao3 s ao3
|
1439 |
+
sao4 s ao4
|
1440 |
+
sao5 s ao5
|
1441 |
+
se1 s e1
|
1442 |
+
se2 s e2
|
1443 |
+
se3 s e3
|
1444 |
+
se4 s e4
|
1445 |
+
se5 s e5
|
1446 |
+
sen1 s en1
|
1447 |
+
sen2 s en2
|
1448 |
+
sen3 s en3
|
1449 |
+
sen4 s en4
|
1450 |
+
sen5 s en5
|
1451 |
+
seng1 s eng1
|
1452 |
+
seng2 s eng2
|
1453 |
+
seng3 s eng3
|
1454 |
+
seng4 s eng4
|
1455 |
+
seng5 s eng5
|
1456 |
+
sha1 sh a1
|
1457 |
+
sha2 sh a2
|
1458 |
+
sha3 sh a3
|
1459 |
+
sha4 sh a4
|
1460 |
+
sha5 sh a5
|
1461 |
+
shai1 sh ai1
|
1462 |
+
shai2 sh ai2
|
1463 |
+
shai3 sh ai3
|
1464 |
+
shai4 sh ai4
|
1465 |
+
shai5 sh ai5
|
1466 |
+
shan1 sh an1
|
1467 |
+
shan2 sh an2
|
1468 |
+
shan3 sh an3
|
1469 |
+
shan4 sh an4
|
1470 |
+
shan5 sh an5
|
1471 |
+
shang1 sh ang1
|
1472 |
+
shang2 sh ang2
|
1473 |
+
shang3 sh ang3
|
1474 |
+
shang4 sh ang4
|
1475 |
+
shang5 sh ang5
|
1476 |
+
shao1 sh ao1
|
1477 |
+
shao2 sh ao2
|
1478 |
+
shao3 sh ao3
|
1479 |
+
shao4 sh ao4
|
1480 |
+
shao5 sh ao5
|
1481 |
+
she1 sh e1
|
1482 |
+
she2 sh e2
|
1483 |
+
she3 sh e3
|
1484 |
+
she4 sh e4
|
1485 |
+
she5 sh e5
|
1486 |
+
shei1 sh ei1
|
1487 |
+
shei2 sh ei2
|
1488 |
+
shei3 sh ei3
|
1489 |
+
shei4 sh ei4
|
1490 |
+
shei5 sh ei5
|
1491 |
+
shen1 sh en1
|
1492 |
+
shen2 sh en2
|
1493 |
+
shen3 sh en3
|
1494 |
+
shen4 sh en4
|
1495 |
+
shen5 sh en5
|
1496 |
+
sheng1 sh eng1
|
1497 |
+
sheng2 sh eng2
|
1498 |
+
sheng3 sh eng3
|
1499 |
+
sheng4 sh eng4
|
1500 |
+
sheng5 sh eng5
|
1501 |
+
shi1 sh iii1
|
1502 |
+
shi2 sh iii2
|
1503 |
+
shi3 sh iii3
|
1504 |
+
shi4 sh iii4
|
1505 |
+
shi5 sh iii5
|
1506 |
+
shou1 sh ou1
|
1507 |
+
shou2 sh ou2
|
1508 |
+
shou3 sh ou3
|
1509 |
+
shou4 sh ou4
|
1510 |
+
shou5 sh ou5
|
1511 |
+
shu1 sh u1
|
1512 |
+
shu2 sh u2
|
1513 |
+
shu3 sh u3
|
1514 |
+
shu4 sh u4
|
1515 |
+
shu5 sh u5
|
1516 |
+
shua1 sh ua1
|
1517 |
+
shua2 sh ua2
|
1518 |
+
shua3 sh ua3
|
1519 |
+
shua4 sh ua4
|
1520 |
+
shua5 sh ua5
|
1521 |
+
shuai1 sh uai1
|
1522 |
+
shuai2 sh uai2
|
1523 |
+
shuai3 sh uai3
|
1524 |
+
shuai4 sh uai4
|
1525 |
+
shuai5 sh uai5
|
1526 |
+
shuan1 sh uan1
|
1527 |
+
shuan2 sh uan2
|
1528 |
+
shuan3 sh uan3
|
1529 |
+
shuan4 sh uan4
|
1530 |
+
shuan5 sh uan5
|
1531 |
+
shuang1 sh uang1
|
1532 |
+
shuang2 sh uang2
|
1533 |
+
shuang3 sh uang3
|
1534 |
+
shuang4 sh uang4
|
1535 |
+
shuang5 sh uang5
|
1536 |
+
shui1 sh uei1
|
1537 |
+
shui2 sh uei2
|
1538 |
+
shui3 sh uei3
|
1539 |
+
shui4 sh uei4
|
1540 |
+
shui5 sh uei5
|
1541 |
+
shun1 sh uen1
|
1542 |
+
shun2 sh uen2
|
1543 |
+
shun3 sh uen3
|
1544 |
+
shun4 sh uen4
|
1545 |
+
shun5 sh uen5
|
1546 |
+
shuo1 sh uo1
|
1547 |
+
shuo2 sh uo2
|
1548 |
+
shuo3 sh uo3
|
1549 |
+
shuo4 sh uo4
|
1550 |
+
shuo5 sh uo5
|
1551 |
+
si1 s ii1
|
1552 |
+
si2 s ii2
|
1553 |
+
si3 s ii3
|
1554 |
+
si4 s ii4
|
1555 |
+
si5 s ii5
|
1556 |
+
song1 s ong1
|
1557 |
+
song2 s ong2
|
1558 |
+
song3 s ong3
|
1559 |
+
song4 s ong4
|
1560 |
+
song5 s ong5
|
1561 |
+
sou1 s ou1
|
1562 |
+
sou2 s ou2
|
1563 |
+
sou3 s ou3
|
1564 |
+
sou4 s ou4
|
1565 |
+
sou5 s ou5
|
1566 |
+
su1 s u1
|
1567 |
+
su2 s u2
|
1568 |
+
su3 s u3
|
1569 |
+
su4 s u4
|
1570 |
+
su5 s u5
|
1571 |
+
suan1 s uan1
|
1572 |
+
suan2 s uan2
|
1573 |
+
suan3 s uan3
|
1574 |
+
suan4 s uan4
|
1575 |
+
suan5 s uan5
|
1576 |
+
sui1 s uei1
|
1577 |
+
sui2 s uei2
|
1578 |
+
sui3 s uei3
|
1579 |
+
sui4 s uei4
|
1580 |
+
sui5 s uei5
|
1581 |
+
sun1 s uen1
|
1582 |
+
sun2 s uen2
|
1583 |
+
sun3 s uen3
|
1584 |
+
sun4 s uen4
|
1585 |
+
sun5 s uen5
|
1586 |
+
suo1 s uo1
|
1587 |
+
suo2 s uo2
|
1588 |
+
suo3 s uo3
|
1589 |
+
suo4 s uo4
|
1590 |
+
suo5 s uo5
|
1591 |
+
ta1 t a1
|
1592 |
+
ta2 t a2
|
1593 |
+
ta3 t a3
|
1594 |
+
ta4 t a4
|
1595 |
+
ta5 t a5
|
1596 |
+
tai1 t ai1
|
1597 |
+
tai2 t ai2
|
1598 |
+
tai3 t ai3
|
1599 |
+
tai4 t ai4
|
1600 |
+
tai5 t ai5
|
1601 |
+
tan1 t an1
|
1602 |
+
tan2 t an2
|
1603 |
+
tan3 t an3
|
1604 |
+
tan4 t an4
|
1605 |
+
tan5 t an5
|
1606 |
+
tang1 t ang1
|
1607 |
+
tang2 t ang2
|
1608 |
+
tang3 t ang3
|
1609 |
+
tang4 t ang4
|
1610 |
+
tang5 t ang5
|
1611 |
+
tao1 t ao1
|
1612 |
+
tao2 t ao2
|
1613 |
+
tao3 t ao3
|
1614 |
+
tao4 t ao4
|
1615 |
+
tao5 t ao5
|
1616 |
+
te1 t e1
|
1617 |
+
te2 t e2
|
1618 |
+
te3 t e3
|
1619 |
+
te4 t e4
|
1620 |
+
te5 t e5
|
1621 |
+
tei1 t ei1
|
1622 |
+
tei2 t ei2
|
1623 |
+
tei3 t ei3
|
1624 |
+
tei4 t ei4
|
1625 |
+
tei5 t ei5
|
1626 |
+
teng1 t eng1
|
1627 |
+
teng2 t eng2
|
1628 |
+
teng3 t eng3
|
1629 |
+
teng4 t eng4
|
1630 |
+
teng5 t eng5
|
1631 |
+
ti1 t i1
|
1632 |
+
ti2 t i2
|
1633 |
+
ti3 t i3
|
1634 |
+
ti4 t i4
|
1635 |
+
ti5 t i5
|
1636 |
+
tian1 t ian1
|
1637 |
+
tian2 t ian2
|
1638 |
+
tian3 t ian3
|
1639 |
+
tian4 t ian4
|
1640 |
+
tian5 t ian5
|
1641 |
+
tiao1 t iao1
|
1642 |
+
tiao2 t iao2
|
1643 |
+
tiao3 t iao3
|
1644 |
+
tiao4 t iao4
|
1645 |
+
tiao5 t iao5
|
1646 |
+
tie1 t ie1
|
1647 |
+
tie2 t ie2
|
1648 |
+
tie3 t ie3
|
1649 |
+
tie4 t ie4
|
1650 |
+
tie5 t ie5
|
1651 |
+
ting1 t ing1
|
1652 |
+
ting2 t ing2
|
1653 |
+
ting3 t ing3
|
1654 |
+
ting4 t ing4
|
1655 |
+
ting5 t ing5
|
1656 |
+
tong1 t ong1
|
1657 |
+
tong2 t ong2
|
1658 |
+
tong3 t ong3
|
1659 |
+
tong4 t ong4
|
1660 |
+
tong5 t ong5
|
1661 |
+
tou1 t ou1
|
1662 |
+
tou2 t ou2
|
1663 |
+
tou3 t ou3
|
1664 |
+
tou4 t ou4
|
1665 |
+
tou5 t ou5
|
1666 |
+
tu1 t u1
|
1667 |
+
tu2 t u2
|
1668 |
+
tu3 t u3
|
1669 |
+
tu4 t u4
|
1670 |
+
tu5 t u5
|
1671 |
+
tuan1 t uan1
|
1672 |
+
tuan2 t uan2
|
1673 |
+
tuan3 t uan3
|
1674 |
+
tuan4 t uan4
|
1675 |
+
tuan5 t uan5
|
1676 |
+
tui1 t uei1
|
1677 |
+
tui2 t uei2
|
1678 |
+
tui3 t uei3
|
1679 |
+
tui4 t uei4
|
1680 |
+
tui5 t uei5
|
1681 |
+
tun1 t uen1
|
1682 |
+
tun2 t uen2
|
1683 |
+
tun3 t uen3
|
1684 |
+
tun4 t uen4
|
1685 |
+
tun5 t uen5
|
1686 |
+
tuo1 t uo1
|
1687 |
+
tuo2 t uo2
|
1688 |
+
tuo3 t uo3
|
1689 |
+
tuo4 t uo4
|
1690 |
+
tuo5 t uo5
|
1691 |
+
wa1 w ua1
|
1692 |
+
wa2 w ua2
|
1693 |
+
wa3 w ua3
|
1694 |
+
wa4 w ua4
|
1695 |
+
wa5 w ua5
|
1696 |
+
wai1 w uai1
|
1697 |
+
wai2 w uai2
|
1698 |
+
wai3 w uai3
|
1699 |
+
wai4 w uai4
|
1700 |
+
wai5 w uai5
|
1701 |
+
wan1 w uan1
|
1702 |
+
wan2 w uan2
|
1703 |
+
wan3 w uan3
|
1704 |
+
wan4 w uan4
|
1705 |
+
wan5 w uan5
|
1706 |
+
wang1 w uang1
|
1707 |
+
wang2 w uang2
|
1708 |
+
wang3 w uang3
|
1709 |
+
wang4 w uang4
|
1710 |
+
wang5 w uang5
|
1711 |
+
wei1 w uei1
|
1712 |
+
wei2 w uei2
|
1713 |
+
wei3 w uei3
|
1714 |
+
wei4 w uei4
|
1715 |
+
wei5 w uei5
|
1716 |
+
wen1 w uen1
|
1717 |
+
wen2 w uen2
|
1718 |
+
wen3 w uen3
|
1719 |
+
wen4 w uen4
|
1720 |
+
wen5 w uen5
|
1721 |
+
weng1 w uen1
|
1722 |
+
weng2 w uen2
|
1723 |
+
weng3 w uen3
|
1724 |
+
weng4 w uen4
|
1725 |
+
weng5 w uen5
|
1726 |
+
wo1 w uo1
|
1727 |
+
wo2 w uo2
|
1728 |
+
wo3 w uo3
|
1729 |
+
wo4 w uo4
|
1730 |
+
wo5 w uo5
|
1731 |
+
wu1 w u1
|
1732 |
+
wu2 w u2
|
1733 |
+
wu3 w u3
|
1734 |
+
wu4 w u4
|
1735 |
+
wu5 w u5
|
1736 |
+
xi1 x i1
|
1737 |
+
xi2 x i2
|
1738 |
+
xi3 x i3
|
1739 |
+
xi4 x i4
|
1740 |
+
xi5 x i5
|
1741 |
+
xia1 x ia1
|
1742 |
+
xia2 x ia2
|
1743 |
+
xia3 x ia3
|
1744 |
+
xia4 x ia4
|
1745 |
+
xia5 x ia5
|
1746 |
+
xian1 x ian1
|
1747 |
+
xian2 x ian2
|
1748 |
+
xian3 x ian3
|
1749 |
+
xian4 x ian4
|
1750 |
+
xian5 x ian5
|
1751 |
+
xiang1 x iang1
|
1752 |
+
xiang2 x iang2
|
1753 |
+
xiang3 x iang3
|
1754 |
+
xiang4 x iang4
|
1755 |
+
xiang5 x iang5
|
1756 |
+
xiao1 x iao1
|
1757 |
+
xiao2 x iao2
|
1758 |
+
xiao3 x iao3
|
1759 |
+
xiao4 x iao4
|
1760 |
+
xiao5 x iao5
|
1761 |
+
xie1 x ie1
|
1762 |
+
xie2 x ie2
|
1763 |
+
xie3 x ie3
|
1764 |
+
xie4 x ie4
|
1765 |
+
xie5 x ie5
|
1766 |
+
xin1 x in1
|
1767 |
+
xin2 x in2
|
1768 |
+
xin3 x in3
|
1769 |
+
xin4 x in4
|
1770 |
+
xin5 x in5
|
1771 |
+
xing1 x ing1
|
1772 |
+
xing2 x ing2
|
1773 |
+
xing3 x ing3
|
1774 |
+
xing4 x ing4
|
1775 |
+
xing5 x ing5
|
1776 |
+
xiong1 x iong1
|
1777 |
+
xiong2 x iong2
|
1778 |
+
xiong3 x iong3
|
1779 |
+
xiong4 x iong4
|
1780 |
+
xiong5 x iong5
|
1781 |
+
xiu1 x iou1
|
1782 |
+
xiu2 x iou2
|
1783 |
+
xiu3 x iou3
|
1784 |
+
xiu4 x iou4
|
1785 |
+
xiu5 x iou5
|
1786 |
+
xu1 x v1
|
1787 |
+
xu2 x v2
|
1788 |
+
xu3 x v3
|
1789 |
+
xu4 x v4
|
1790 |
+
xu5 x v5
|
1791 |
+
xuan1 x van1
|
1792 |
+
xuan2 x van2
|
1793 |
+
xuan3 x van3
|
1794 |
+
xuan4 x van4
|
1795 |
+
xuan5 x van5
|
1796 |
+
xue1 x ve1
|
1797 |
+
xue2 x ve2
|
1798 |
+
xue3 x ve3
|
1799 |
+
xue4 x ve4
|
1800 |
+
xue5 x ve5
|
1801 |
+
xun1 x vn1
|
1802 |
+
xun2 x vn2
|
1803 |
+
xun3 x vn3
|
1804 |
+
xun4 x vn4
|
1805 |
+
xun5 x vn5
|
1806 |
+
ya1 y ia1
|
1807 |
+
ya2 y ia2
|
1808 |
+
ya3 y ia3
|
1809 |
+
ya4 y ia4
|
1810 |
+
ya5 y ia5
|
1811 |
+
yan1 y ian1
|
1812 |
+
yan2 y ian2
|
1813 |
+
yan3 y ian3
|
1814 |
+
yan4 y ian4
|
1815 |
+
yan5 y ian5
|
1816 |
+
yang1 y iang1
|
1817 |
+
yang2 y iang2
|
1818 |
+
yang3 y iang3
|
1819 |
+
yang4 y iang4
|
1820 |
+
yang5 y iang5
|
1821 |
+
yao1 y iao1
|
1822 |
+
yao2 y iao2
|
1823 |
+
yao3 y iao3
|
1824 |
+
yao4 y iao4
|
1825 |
+
yao5 y iao5
|
1826 |
+
ye1 y ie1
|
1827 |
+
ye2 y ie2
|
1828 |
+
ye3 y ie3
|
1829 |
+
ye4 y ie4
|
1830 |
+
ye5 y ie5
|
1831 |
+
yi1 y i1
|
1832 |
+
yi2 y i2
|
1833 |
+
yi3 y i3
|
1834 |
+
yi4 y i4
|
1835 |
+
yi5 y i5
|
1836 |
+
yin1 y in1
|
1837 |
+
yin2 y in2
|
1838 |
+
yin3 y in3
|
1839 |
+
yin4 y in4
|
1840 |
+
yin5 y in5
|
1841 |
+
ying1 y ing1
|
1842 |
+
ying2 y ing2
|
1843 |
+
ying3 y ing3
|
1844 |
+
ying4 y ing4
|
1845 |
+
ying5 y ing5
|
1846 |
+
yo1 y iou1
|
1847 |
+
yo2 y iou2
|
1848 |
+
yo3 y iou3
|
1849 |
+
yo4 y iou4
|
1850 |
+
yo5 y iou5
|
1851 |
+
yong1 y iong1
|
1852 |
+
yong2 y iong2
|
1853 |
+
yong3 y iong3
|
1854 |
+
yong4 y iong4
|
1855 |
+
yong5 y iong5
|
1856 |
+
you1 y iou1
|
1857 |
+
you2 y iou2
|
1858 |
+
you3 y iou3
|
1859 |
+
you4 y iou4
|
1860 |
+
you5 y iou5
|
1861 |
+
yu1 y v1
|
1862 |
+
yu2 y v2
|
1863 |
+
yu3 y v3
|
1864 |
+
yu4 y v4
|
1865 |
+
yu5 y v5
|
1866 |
+
yuan1 y van1
|
1867 |
+
yuan2 y van2
|
1868 |
+
yuan3 y van3
|
1869 |
+
yuan4 y van4
|
1870 |
+
yuan5 y van5
|
1871 |
+
yue1 y ve1
|
1872 |
+
yue2 y ve2
|
1873 |
+
yue3 y ve3
|
1874 |
+
yue4 y ve4
|
1875 |
+
yue5 y ve5
|
1876 |
+
yun1 y vn1
|
1877 |
+
yun2 y vn2
|
1878 |
+
yun3 y vn3
|
1879 |
+
yun4 y vn4
|
1880 |
+
yun5 y vn5
|
1881 |
+
za1 z a1
|
1882 |
+
za2 z a2
|
1883 |
+
za3 z a3
|
1884 |
+
za4 z a4
|
1885 |
+
za5 z a5
|
1886 |
+
zai1 z ai1
|
1887 |
+
zai2 z ai2
|
1888 |
+
zai3 z ai3
|
1889 |
+
zai4 z ai4
|
1890 |
+
zai5 z ai5
|
1891 |
+
zan1 z an1
|
1892 |
+
zan2 z an2
|
1893 |
+
zan3 z an3
|
1894 |
+
zan4 z an4
|
1895 |
+
zan5 z an5
|
1896 |
+
zang1 z ang1
|
1897 |
+
zang2 z ang2
|
1898 |
+
zang3 z ang3
|
1899 |
+
zang4 z ang4
|
1900 |
+
zang5 z ang5
|
1901 |
+
zao1 z ao1
|
1902 |
+
zao2 z ao2
|
1903 |
+
zao3 z ao3
|
1904 |
+
zao4 z ao4
|
1905 |
+
zao5 z ao5
|
1906 |
+
ze1 z e1
|
1907 |
+
ze2 z e2
|
1908 |
+
ze3 z e3
|
1909 |
+
ze4 z e4
|
1910 |
+
ze5 z e5
|
1911 |
+
zei1 z ei1
|
1912 |
+
zei2 z ei2
|
1913 |
+
zei3 z ei3
|
1914 |
+
zei4 z ei4
|
1915 |
+
zei5 z ei5
|
1916 |
+
zen1 z en1
|
1917 |
+
zen2 z en2
|
1918 |
+
zen3 z en3
|
1919 |
+
zen4 z en4
|
1920 |
+
zen5 z en5
|
1921 |
+
zeng1 z eng1
|
1922 |
+
zeng2 z eng2
|
1923 |
+
zeng3 z eng3
|
1924 |
+
zeng4 z eng4
|
1925 |
+
zeng5 z eng5
|
1926 |
+
zha1 zh a1
|
1927 |
+
zha2 zh a2
|
1928 |
+
zha3 zh a3
|
1929 |
+
zha4 zh a4
|
1930 |
+
zha5 zh a5
|
1931 |
+
zhai1 zh ai1
|
1932 |
+
zhai2 zh ai2
|
1933 |
+
zhai3 zh ai3
|
1934 |
+
zhai4 zh ai4
|
1935 |
+
zhai5 zh ai5
|
1936 |
+
zhan1 zh an1
|
1937 |
+
zhan2 zh an2
|
1938 |
+
zhan3 zh an3
|
1939 |
+
zhan4 zh an4
|
1940 |
+
zhan5 zh an5
|
1941 |
+
zhang1 zh ang1
|
1942 |
+
zhang2 zh ang2
|
1943 |
+
zhang3 zh ang3
|
1944 |
+
zhang4 zh ang4
|
1945 |
+
zhang5 zh ang5
|
1946 |
+
zhao1 zh ao1
|
1947 |
+
zhao2 zh ao2
|
1948 |
+
zhao3 zh ao3
|
1949 |
+
zhao4 zh ao4
|
1950 |
+
zhao5 zh ao5
|
1951 |
+
zhe1 zh e1
|
1952 |
+
zhe2 zh e2
|
1953 |
+
zhe3 zh e3
|
1954 |
+
zhe4 zh e4
|
1955 |
+
zhe5 zh e5
|
1956 |
+
zhei1 zh ei1
|
1957 |
+
zhei2 zh ei2
|
1958 |
+
zhei3 zh ei3
|
1959 |
+
zhei4 zh ei4
|
1960 |
+
zhei5 zh ei5
|
1961 |
+
zhen1 zh en1
|
1962 |
+
zhen2 zh en2
|
1963 |
+
zhen3 zh en3
|
1964 |
+
zhen4 zh en4
|
1965 |
+
zhen5 zh en5
|
1966 |
+
zheng1 zh eng1
|
1967 |
+
zheng2 zh eng2
|
1968 |
+
zheng3 zh eng3
|
1969 |
+
zheng4 zh eng4
|
1970 |
+
zheng5 zh eng5
|
1971 |
+
zhi1 zh iii1
|
1972 |
+
zhi2 zh iii2
|
1973 |
+
zhi3 zh iii3
|
1974 |
+
zhi4 zh iii4
|
1975 |
+
zhi5 zh iii5
|
1976 |
+
zhong1 zh ong1
|
1977 |
+
zhong2 zh ong2
|
1978 |
+
zhong3 zh ong3
|
1979 |
+
zhong4 zh ong4
|
1980 |
+
zhong5 zh ong5
|
1981 |
+
zhou1 zh ou1
|
1982 |
+
zhou2 zh ou2
|
1983 |
+
zhou3 zh ou3
|
1984 |
+
zhou4 zh ou4
|
1985 |
+
zhou5 zh ou5
|
1986 |
+
zhu1 zh u1
|
1987 |
+
zhu2 zh u2
|
1988 |
+
zhu3 zh u3
|
1989 |
+
zhu4 zh u4
|
1990 |
+
zhu5 zh u5
|
1991 |
+
zhua1 zh ua1
|
1992 |
+
zhua2 zh ua2
|
1993 |
+
zhua3 zh ua3
|
1994 |
+
zhua4 zh ua4
|
1995 |
+
zhua5 zh ua5
|
1996 |
+
zhuai1 zh uai1
|
1997 |
+
zhuai2 zh uai2
|
1998 |
+
zhuai3 zh uai3
|
1999 |
+
zhuai4 zh uai4
|
2000 |
+
zhuai5 zh uai5
|
2001 |
+
zhuan1 zh uan1
|
2002 |
+
zhuan2 zh uan2
|
2003 |
+
zhuan3 zh uan3
|
2004 |
+
zhuan4 zh uan4
|
2005 |
+
zhuan5 zh uan5
|
2006 |
+
zhuang1 zh uang1
|
2007 |
+
zhuang2 zh uang2
|
2008 |
+
zhuang3 zh uang3
|
2009 |
+
zhuang4 zh uang4
|
2010 |
+
zhuang5 zh uang5
|
2011 |
+
zhui1 zh uei1
|
2012 |
+
zhui2 zh uei2
|
2013 |
+
zhui3 zh uei3
|
2014 |
+
zhui4 zh uei4
|
2015 |
+
zhui5 zh uei5
|
2016 |
+
zhun1 zh uen1
|
2017 |
+
zhun2 zh uen2
|
2018 |
+
zhun3 zh uen3
|
2019 |
+
zhun4 zh uen4
|
2020 |
+
zhun5 zh uen5
|
2021 |
+
zhuo1 zh uo1
|
2022 |
+
zhuo2 zh uo2
|
2023 |
+
zhuo3 zh uo3
|
2024 |
+
zhuo4 zh uo4
|
2025 |
+
zhuo5 zh uo5
|
2026 |
+
zi1 z ii1
|
2027 |
+
zi2 z ii2
|
2028 |
+
zi3 z ii3
|
2029 |
+
zi4 z ii4
|
2030 |
+
zi5 z ii5
|
2031 |
+
zong1 z ong1
|
2032 |
+
zong2 z ong2
|
2033 |
+
zong3 z ong3
|
2034 |
+
zong4 z ong4
|
2035 |
+
zong5 z ong5
|
2036 |
+
zou1 z ou1
|
2037 |
+
zou2 z ou2
|
2038 |
+
zou3 z ou3
|
2039 |
+
zou4 z ou4
|
2040 |
+
zou5 z ou5
|
2041 |
+
zu1 z u1
|
2042 |
+
zu2 z u2
|
2043 |
+
zu3 z u3
|
2044 |
+
zu4 z u4
|
2045 |
+
zu5 z u5
|
2046 |
+
zuan1 z uan1
|
2047 |
+
zuan2 z uan2
|
2048 |
+
zuan3 z uan3
|
2049 |
+
zuan4 z uan4
|
2050 |
+
zuan5 z uan5
|
2051 |
+
zui1 z uei1
|
2052 |
+
zui2 z uei2
|
2053 |
+
zui3 z uei3
|
2054 |
+
zui4 z uei4
|
2055 |
+
zui5 z uei5
|
2056 |
+
zun1 z uen1
|
2057 |
+
zun2 z uen2
|
2058 |
+
zun3 z uen3
|
2059 |
+
zun4 z uen4
|
2060 |
+
zun5 z uen5
|
2061 |
+
zuo1 z uo1
|
2062 |
+
zuo2 z uo2
|
2063 |
+
zuo3 z uo3
|
2064 |
+
zuo4 z uo4
|
2065 |
+
zuo5 z uo5
|
2066 |
+
ar1 a1 rr
|
2067 |
+
ar2 a2 rr
|
2068 |
+
ar3 a3 rr
|
2069 |
+
ar4 a4 rr
|
2070 |
+
ar5 a5 rr
|
2071 |
+
air1 ai1 rr
|
2072 |
+
air2 ai2 rr
|
2073 |
+
air3 ai3 rr
|
2074 |
+
air4 ai4 rr
|
2075 |
+
air5 ai5 rr
|
2076 |
+
anr1 an1 rr
|
2077 |
+
anr2 an2 rr
|
2078 |
+
anr3 an3 rr
|
2079 |
+
anr4 an4 rr
|
2080 |
+
anr5 an5 rr
|
2081 |
+
angr1 ang1 rr
|
2082 |
+
angr2 ang2 rr
|
2083 |
+
angr3 ang3 rr
|
2084 |
+
angr4 ang4 rr
|
2085 |
+
angr5 ang5 rr
|
2086 |
+
aor1 ao1 rr
|
2087 |
+
aor2 ao2 rr
|
2088 |
+
aor3 ao3 rr
|
2089 |
+
aor4 ao4 rr
|
2090 |
+
aor5 ao5 rr
|
2091 |
+
bar1 b a1 rr
|
2092 |
+
bar2 b a2 rr
|
2093 |
+
bar3 b a3 rr
|
2094 |
+
bar4 b a4 rr
|
2095 |
+
bar5 b a5 rr
|
2096 |
+
bair1 b ai1 rr
|
2097 |
+
bair2 b ai2 rr
|
2098 |
+
bair3 b ai3 rr
|
2099 |
+
bair4 b ai4 rr
|
2100 |
+
bair5 b ai5 rr
|
2101 |
+
banr1 b an1 rr
|
2102 |
+
banr2 b an2 rr
|
2103 |
+
banr3 b an3 rr
|
2104 |
+
banr4 b an4 rr
|
2105 |
+
banr5 b an5 rr
|
2106 |
+
bangr1 b ang1 rr
|
2107 |
+
bangr2 b ang2 rr
|
2108 |
+
bangr3 b ang3 rr
|
2109 |
+
bangr4 b ang4 rr
|
2110 |
+
bangr5 b ang5 rr
|
2111 |
+
baor1 b ao1 rr
|
2112 |
+
baor2 b ao2 rr
|
2113 |
+
baor3 b ao3 rr
|
2114 |
+
baor4 b ao4 rr
|
2115 |
+
baor5 b ao5 rr
|
2116 |
+
beir1 b ei1 rr
|
2117 |
+
beir2 b ei2 rr
|
2118 |
+
beir3 b ei3 rr
|
2119 |
+
beir4 b ei4 rr
|
2120 |
+
beir5 b ei5 rr
|
2121 |
+
benr1 b en1 rr
|
2122 |
+
benr2 b en2 rr
|
2123 |
+
benr3 b en3 rr
|
2124 |
+
benr4 b en4 rr
|
2125 |
+
benr5 b en5 rr
|
2126 |
+
bengr1 b eng1 rr
|
2127 |
+
bengr2 b eng2 rr
|
2128 |
+
bengr3 b eng3 rr
|
2129 |
+
bengr4 b eng4 rr
|
2130 |
+
bengr5 b eng5 rr
|
2131 |
+
bir1 b i1 rr
|
2132 |
+
bir2 b i2 rr
|
2133 |
+
bir3 b i3 rr
|
2134 |
+
bir4 b i4 rr
|
2135 |
+
bir5 b i5 rr
|
2136 |
+
bianr1 b ian1 rr
|
2137 |
+
bianr2 b ian2 rr
|
2138 |
+
bianr3 b ian3 rr
|
2139 |
+
bianr4 b ian4 rr
|
2140 |
+
bianr5 b ian5 rr
|
2141 |
+
biaor1 b iao1 rr
|
2142 |
+
biaor2 b iao2 rr
|
2143 |
+
biaor3 b iao3 rr
|
2144 |
+
biaor4 b iao4 rr
|
2145 |
+
biaor5 b iao5 rr
|
2146 |
+
bier1 b ie1 rr
|
2147 |
+
bier2 b ie2 rr
|
2148 |
+
bier3 b ie3 rr
|
2149 |
+
bier4 b ie4 rr
|
2150 |
+
bier5 b ie5 rr
|
2151 |
+
binr1 b in1 rr
|
2152 |
+
binr2 b in2 rr
|
2153 |
+
binr3 b in3 rr
|
2154 |
+
binr4 b in4 rr
|
2155 |
+
binr5 b in5 rr
|
2156 |
+
bingr1 b ing1 rr
|
2157 |
+
bingr2 b ing2 rr
|
2158 |
+
bingr3 b ing3 rr
|
2159 |
+
bingr4 b ing4 rr
|
2160 |
+
bingr5 b ing5 rr
|
2161 |
+
bor1 b o1 rr
|
2162 |
+
bor2 b o2 rr
|
2163 |
+
bor3 b o3 rr
|
2164 |
+
bor4 b o4 rr
|
2165 |
+
bor5 b o5 rr
|
2166 |
+
bur1 b u1 rr
|
2167 |
+
bur2 b u2 rr
|
2168 |
+
bur3 b u3 rr
|
2169 |
+
bur4 b u4 rr
|
2170 |
+
bur5 b u5 rr
|
2171 |
+
car1 c a1 rr
|
2172 |
+
car2 c a2 rr
|
2173 |
+
car3 c a3 rr
|
2174 |
+
car4 c a4 rr
|
2175 |
+
car5 c a5 rr
|
2176 |
+
cair1 c ai1 rr
|
2177 |
+
cair2 c ai2 rr
|
2178 |
+
cair3 c ai3 rr
|
2179 |
+
cair4 c ai4 rr
|
2180 |
+
cair5 c ai5 rr
|
2181 |
+
canr1 c an1 rr
|
2182 |
+
canr2 c an2 rr
|
2183 |
+
canr3 c an3 rr
|
2184 |
+
canr4 c an4 rr
|
2185 |
+
canr5 c an5 rr
|
2186 |
+
cangr1 c ang1 rr
|
2187 |
+
cangr2 c ang2 rr
|
2188 |
+
cangr3 c ang3 rr
|
2189 |
+
cangr4 c ang4 rr
|
2190 |
+
cangr5 c ang5 rr
|
2191 |
+
caor1 c ao1 rr
|
2192 |
+
caor2 c ao2 rr
|
2193 |
+
caor3 c ao3 rr
|
2194 |
+
caor4 c ao4 rr
|
2195 |
+
caor5 c ao5 rr
|
2196 |
+
cer1 c e1 rr
|
2197 |
+
cer2 c e2 rr
|
2198 |
+
cer3 c e3 rr
|
2199 |
+
cer4 c e4 rr
|
2200 |
+
cer5 c e5 rr
|
2201 |
+
cenr1 c en1 rr
|
2202 |
+
cenr2 c en2 rr
|
2203 |
+
cenr3 c en3 rr
|
2204 |
+
cenr4 c en4 rr
|
2205 |
+
cenr5 c en5 rr
|
2206 |
+
cengr1 c eng1 rr
|
2207 |
+
cengr2 c eng2 rr
|
2208 |
+
cengr3 c eng3 rr
|
2209 |
+
cengr4 c eng4 rr
|
2210 |
+
cengr5 c eng5 rr
|
2211 |
+
char1 ch a1 rr
|
2212 |
+
char2 ch a2 rr
|
2213 |
+
char3 ch a3 rr
|
2214 |
+
char4 ch a4 rr
|
2215 |
+
char5 ch a5 rr
|
2216 |
+
chair1 ch ai1 rr
|
2217 |
+
chair2 ch ai2 rr
|
2218 |
+
chair3 ch ai3 rr
|
2219 |
+
chair4 ch ai4 rr
|
2220 |
+
chair5 ch ai5 rr
|
2221 |
+
chanr1 ch an1 rr
|
2222 |
+
chanr2 ch an2 rr
|
2223 |
+
chanr3 ch an3 rr
|
2224 |
+
chanr4 ch an4 rr
|
2225 |
+
chanr5 ch an5 rr
|
2226 |
+
changr1 ch ang1 rr
|
2227 |
+
changr2 ch ang2 rr
|
2228 |
+
changr3 ch ang3 rr
|
2229 |
+
changr4 ch ang4 rr
|
2230 |
+
changr5 ch ang5 rr
|
2231 |
+
chaor1 ch ao1 rr
|
2232 |
+
chaor2 ch ao2 rr
|
2233 |
+
chaor3 ch ao3 rr
|
2234 |
+
chaor4 ch ao4 rr
|
2235 |
+
chaor5 ch ao5 rr
|
2236 |
+
cher1 ch e1 rr
|
2237 |
+
cher2 ch e2 rr
|
2238 |
+
cher3 ch e3 rr
|
2239 |
+
cher4 ch e4 rr
|
2240 |
+
cher5 ch e5 rr
|
2241 |
+
chenr1 ch en1 rr
|
2242 |
+
chenr2 ch en2 rr
|
2243 |
+
chenr3 ch en3 rr
|
2244 |
+
chenr4 ch en4 rr
|
2245 |
+
chenr5 ch en5 rr
|
2246 |
+
chengr1 ch eng1 rr
|
2247 |
+
chengr2 ch eng2 rr
|
2248 |
+
chengr3 ch eng3 rr
|
2249 |
+
chengr4 ch eng4 rr
|
2250 |
+
chengr5 ch eng5 rr
|
2251 |
+
chir1 ch iii1 rr
|
2252 |
+
chir2 ch iii2 rr
|
2253 |
+
chir3 ch iii3 rr
|
2254 |
+
chir4 ch iii4 rr
|
2255 |
+
chir5 ch iii5 rr
|
2256 |
+
chongr1 ch ong1 rr
|
2257 |
+
chongr2 ch ong2 rr
|
2258 |
+
chongr3 ch ong3 rr
|
2259 |
+
chongr4 ch ong4 rr
|
2260 |
+
chongr5 ch ong5 rr
|
2261 |
+
chour1 ch ou1 rr
|
2262 |
+
chour2 ch ou2 rr
|
2263 |
+
chour3 ch ou3 rr
|
2264 |
+
chour4 ch ou4 rr
|
2265 |
+
chour5 ch ou5 rr
|
2266 |
+
chur1 ch u1 rr
|
2267 |
+
chur2 ch u2 rr
|
2268 |
+
chur3 ch u3 rr
|
2269 |
+
chur4 ch u4 rr
|
2270 |
+
chur5 ch u5 rr
|
2271 |
+
chuair1 ch uai1 rr
|
2272 |
+
chuair2 ch uai2 rr
|
2273 |
+
chuair3 ch uai3 rr
|
2274 |
+
chuair4 ch uai4 rr
|
2275 |
+
chuair5 ch uai5 rr
|
2276 |
+
chuanr1 ch uan1 rr
|
2277 |
+
chuanr2 ch uan2 rr
|
2278 |
+
chuanr3 ch uan3 rr
|
2279 |
+
chuanr4 ch uan4 rr
|
2280 |
+
chuanr5 ch uan5 rr
|
2281 |
+
chuangr1 ch uang1 rr
|
2282 |
+
chuangr2 ch uang2 rr
|
2283 |
+
chuangr3 ch uang3 rr
|
2284 |
+
chuangr4 ch uang4 rr
|
2285 |
+
chuangr5 ch uang5 rr
|
2286 |
+
chuir1 ch uei1 rr
|
2287 |
+
chuir2 ch uei2 rr
|
2288 |
+
chuir3 ch uei3 rr
|
2289 |
+
chuir4 ch uei4 rr
|
2290 |
+
chuir5 ch uei5 rr
|
2291 |
+
chunr1 ch uen1 rr
|
2292 |
+
chunr2 ch uen2 rr
|
2293 |
+
chunr3 ch uen3 rr
|
2294 |
+
chunr4 ch uen4 rr
|
2295 |
+
chunr5 ch uen5 rr
|
2296 |
+
chuor1 ch uo1 rr
|
2297 |
+
chuor2 ch uo2 rr
|
2298 |
+
chuor3 ch uo3 rr
|
2299 |
+
chuor4 ch uo4 rr
|
2300 |
+
chuor5 ch uo5 rr
|
2301 |
+
cir1 c ii1 rr
|
2302 |
+
cir2 c ii2 rr
|
2303 |
+
cir3 c ii3 rr
|
2304 |
+
cir4 c ii4 rr
|
2305 |
+
cir5 c ii5 rr
|
2306 |
+
congr1 c ong1 rr
|
2307 |
+
congr2 c ong2 rr
|
2308 |
+
congr3 c ong3 rr
|
2309 |
+
congr4 c ong4 rr
|
2310 |
+
congr5 c ong5 rr
|
2311 |
+
cour1 c ou1 rr
|
2312 |
+
cour2 c ou2 rr
|
2313 |
+
cour3 c ou3 rr
|
2314 |
+
cour4 c ou4 rr
|
2315 |
+
cour5 c ou5 rr
|
2316 |
+
cur1 c u1 rr
|
2317 |
+
cur2 c u2 rr
|
2318 |
+
cur3 c u3 rr
|
2319 |
+
cur4 c u4 rr
|
2320 |
+
cur5 c u5 rr
|
2321 |
+
cuanr1 c uan1 rr
|
2322 |
+
cuanr2 c uan2 rr
|
2323 |
+
cuanr3 c uan3 rr
|
2324 |
+
cuanr4 c uan4 rr
|
2325 |
+
cuanr5 c uan5 rr
|
2326 |
+
cuir1 c uei1 rr
|
2327 |
+
cuir2 c uei2 rr
|
2328 |
+
cuir3 c uei3 rr
|
2329 |
+
cuir4 c uei4 rr
|
2330 |
+
cuir5 c uei5 rr
|
2331 |
+
cunr1 c uen1 rr
|
2332 |
+
cunr2 c uen2 rr
|
2333 |
+
cunr3 c uen3 rr
|
2334 |
+
cunr4 c uen4 rr
|
2335 |
+
cunr5 c uen5 rr
|
2336 |
+
cuor1 c uo1 rr
|
2337 |
+
cuor2 c uo2 rr
|
2338 |
+
cuor3 c uo3 rr
|
2339 |
+
cuor4 c uo4 rr
|
2340 |
+
cuor5 c uo5 rr
|
2341 |
+
dar1 d a1 rr
|
2342 |
+
dar2 d a2 rr
|
2343 |
+
dar3 d a3 rr
|
2344 |
+
dar4 d a4 rr
|
2345 |
+
dar5 d a5 rr
|
2346 |
+
dair1 d ai1 rr
|
2347 |
+
dair2 d ai2 rr
|
2348 |
+
dair3 d ai3 rr
|
2349 |
+
dair4 d ai4 rr
|
2350 |
+
dair5 d ai5 rr
|
2351 |
+
danr1 d an1 rr
|
2352 |
+
danr2 d an2 rr
|
2353 |
+
danr3 d an3 rr
|
2354 |
+
danr4 d an4 rr
|
2355 |
+
danr5 d an5 rr
|
2356 |
+
dangr1 d ang1 rr
|
2357 |
+
dangr2 d ang2 rr
|
2358 |
+
dangr3 d ang3 rr
|
2359 |
+
dangr4 d ang4 rr
|
2360 |
+
dangr5 d ang5 rr
|
2361 |
+
daor1 d ao1 rr
|
2362 |
+
daor2 d ao2 rr
|
2363 |
+
daor3 d ao3 rr
|
2364 |
+
daor4 d ao4 rr
|
2365 |
+
daor5 d ao5 rr
|
2366 |
+
der1 d e1 rr
|
2367 |
+
der2 d e2 rr
|
2368 |
+
der3 d e3 rr
|
2369 |
+
der4 d e4 rr
|
2370 |
+
der5 d e5 rr
|
2371 |
+
deir1 d ei1 rr
|
2372 |
+
deir2 d ei2 rr
|
2373 |
+
deir3 d ei3 rr
|
2374 |
+
deir4 d ei4 rr
|
2375 |
+
deir5 d ei5 rr
|
2376 |
+
denr1 d en1 rr
|
2377 |
+
denr2 d en2 rr
|
2378 |
+
denr3 d en3 rr
|
2379 |
+
denr4 d en4 rr
|
2380 |
+
denr5 d en5 rr
|
2381 |
+
dengr1 d eng1 rr
|
2382 |
+
dengr2 d eng2 rr
|
2383 |
+
dengr3 d eng3 rr
|
2384 |
+
dengr4 d eng4 rr
|
2385 |
+
dengr5 d eng5 rr
|
2386 |
+
dir1 d i1 rr
|
2387 |
+
dir2 d i2 rr
|
2388 |
+
dir3 d i3 rr
|
2389 |
+
dir4 d i4 rr
|
2390 |
+
dir5 d i5 rr
|
2391 |
+
diar1 d ia1 rr
|
2392 |
+
diar2 d ia2 rr
|
2393 |
+
diar3 d ia3 rr
|
2394 |
+
diar4 d ia4 rr
|
2395 |
+
diar5 d ia5 rr
|
2396 |
+
dianr1 d ian1 rr
|
2397 |
+
dianr2 d ian2 rr
|
2398 |
+
dianr3 d ian3 rr
|
2399 |
+
dianr4 d ian4 rr
|
2400 |
+
dianr5 d ian5 rr
|
2401 |
+
diaor1 d iao1 rr
|
2402 |
+
diaor2 d iao2 rr
|
2403 |
+
diaor3 d iao3 rr
|
2404 |
+
diaor4 d iao4 rr
|
2405 |
+
diaor5 d iao5 rr
|
2406 |
+
dier1 d ie1 rr
|
2407 |
+
dier2 d ie2 rr
|
2408 |
+
dier3 d ie3 rr
|
2409 |
+
dier4 d ie4 rr
|
2410 |
+
dier5 d ie5 rr
|
2411 |
+
dingr1 d ing1 rr
|
2412 |
+
dingr2 d ing2 rr
|
2413 |
+
dingr3 d ing3 rr
|
2414 |
+
dingr4 d ing4 rr
|
2415 |
+
dingr5 d ing5 rr
|
2416 |
+
diur1 d iou1 rr
|
2417 |
+
diur2 d iou2 rr
|
2418 |
+
diur3 d iou3 rr
|
2419 |
+
diur4 d iou4 rr
|
2420 |
+
diur5 d iou5 rr
|
2421 |
+
dongr1 d ong1 rr
|
2422 |
+
dongr2 d ong2 rr
|
2423 |
+
dongr3 d ong3 rr
|
2424 |
+
dongr4 d ong4 rr
|
2425 |
+
dongr5 d ong5 rr
|
2426 |
+
dour1 d ou1 rr
|
2427 |
+
dour2 d ou2 rr
|
2428 |
+
dour3 d ou3 rr
|
2429 |
+
dour4 d ou4 rr
|
2430 |
+
dour5 d ou5 rr
|
2431 |
+
dur1 d u1 rr
|
2432 |
+
dur2 d u2 rr
|
2433 |
+
dur3 d u3 rr
|
2434 |
+
dur4 d u4 rr
|
2435 |
+
dur5 d u5 rr
|
2436 |
+
duanr1 d uan1 rr
|
2437 |
+
duanr2 d uan2 rr
|
2438 |
+
duanr3 d uan3 rr
|
2439 |
+
duanr4 d uan4 rr
|
2440 |
+
duanr5 d uan5 rr
|
2441 |
+
duir1 d uei1 rr
|
2442 |
+
duir2 d uei2 rr
|
2443 |
+
duir3 d uei3 rr
|
2444 |
+
duir4 d uei4 rr
|
2445 |
+
duir5 d uei5 rr
|
2446 |
+
dunr1 d uen1 rr
|
2447 |
+
dunr2 d uen2 rr
|
2448 |
+
dunr3 d uen3 rr
|
2449 |
+
dunr4 d uen4 rr
|
2450 |
+
dunr5 d uen5 rr
|
2451 |
+
duor1 d uo1 rr
|
2452 |
+
duor2 d uo2 rr
|
2453 |
+
duor3 d uo3 rr
|
2454 |
+
duor4 d uo4 rr
|
2455 |
+
duor5 d uo5 rr
|
2456 |
+
er1 e1 rr
|
2457 |
+
er2 e2 rr
|
2458 |
+
er3 e3 rr
|
2459 |
+
er4 e4 rr
|
2460 |
+
er5 e5 rr
|
2461 |
+
eir1 ei1 rr
|
2462 |
+
eir2 ei2 rr
|
2463 |
+
eir3 ei3 rr
|
2464 |
+
eir4 ei4 rr
|
2465 |
+
eir5 ei5 rr
|
2466 |
+
enr1 en1 rr
|
2467 |
+
enr2 en2 rr
|
2468 |
+
enr3 en3 rr
|
2469 |
+
enr4 en4 rr
|
2470 |
+
enr5 en5 rr
|
2471 |
+
engr1 eng1 rr
|
2472 |
+
engr2 eng2 rr
|
2473 |
+
engr3 eng3 rr
|
2474 |
+
engr4 eng4 rr
|
2475 |
+
engr5 eng5 rr
|
2476 |
+
far1 f a1 rr
|
2477 |
+
far2 f a2 rr
|
2478 |
+
far3 f a3 rr
|
2479 |
+
far4 f a4 rr
|
2480 |
+
far5 f a5 rr
|
2481 |
+
fanr1 f an1 rr
|
2482 |
+
fanr2 f an2 rr
|
2483 |
+
fanr3 f an3 rr
|
2484 |
+
fanr4 f an4 rr
|
2485 |
+
fanr5 f an5 rr
|
2486 |
+
fangr1 f ang1 rr
|
2487 |
+
fangr2 f ang2 rr
|
2488 |
+
fangr3 f ang3 rr
|
2489 |
+
fangr4 f ang4 rr
|
2490 |
+
fangr5 f ang5 rr
|
2491 |
+
feir1 f ei1 rr
|
2492 |
+
feir2 f ei2 rr
|
2493 |
+
feir3 f ei3 rr
|
2494 |
+
feir4 f ei4 rr
|
2495 |
+
feir5 f ei5 rr
|
2496 |
+
fenr1 f en1 rr
|
2497 |
+
fenr2 f en2 rr
|
2498 |
+
fenr3 f en3 rr
|
2499 |
+
fenr4 f en4 rr
|
2500 |
+
fenr5 f en5 rr
|
2501 |
+
fengr1 f eng1 rr
|
2502 |
+
fengr2 f eng2 rr
|
2503 |
+
fengr3 f eng3 rr
|
2504 |
+
fengr4 f eng4 rr
|
2505 |
+
fengr5 f eng5 rr
|
2506 |
+
for1 f o1 rr
|
2507 |
+
for2 f o2 rr
|
2508 |
+
for3 f o3 rr
|
2509 |
+
for4 f o4 rr
|
2510 |
+
for5 f o5 rr
|
2511 |
+
four1 f ou1 rr
|
2512 |
+
four2 f ou2 rr
|
2513 |
+
four3 f ou3 rr
|
2514 |
+
four4 f ou4 rr
|
2515 |
+
four5 f ou5 rr
|
2516 |
+
fur1 f u1 rr
|
2517 |
+
fur2 f u2 rr
|
2518 |
+
fur3 f u3 rr
|
2519 |
+
fur4 f u4 rr
|
2520 |
+
fur5 f u5 rr
|
2521 |
+
gar1 g a1 rr
|
2522 |
+
gar2 g a2 rr
|
2523 |
+
gar3 g a3 rr
|
2524 |
+
gar4 g a4 rr
|
2525 |
+
gar5 g a5 rr
|
2526 |
+
gair1 g ai1 rr
|
2527 |
+
gair2 g ai2 rr
|
2528 |
+
gair3 g ai3 rr
|
2529 |
+
gair4 g ai4 rr
|
2530 |
+
gair5 g ai5 rr
|
2531 |
+
ganr1 g an1 rr
|
2532 |
+
ganr2 g an2 rr
|
2533 |
+
ganr3 g an3 rr
|
2534 |
+
ganr4 g an4 rr
|
2535 |
+
ganr5 g an5 rr
|
2536 |
+
gangr1 g ang1 rr
|
2537 |
+
gangr2 g ang2 rr
|
2538 |
+
gangr3 g ang3 rr
|
2539 |
+
gangr4 g ang4 rr
|
2540 |
+
gangr5 g ang5 rr
|
2541 |
+
gaor1 g ao1 rr
|
2542 |
+
gaor2 g ao2 rr
|
2543 |
+
gaor3 g ao3 rr
|
2544 |
+
gaor4 g ao4 rr
|
2545 |
+
gaor5 g ao5 rr
|
2546 |
+
ger1 g e1 rr
|
2547 |
+
ger2 g e2 rr
|
2548 |
+
ger3 g e3 rr
|
2549 |
+
ger4 g e4 rr
|
2550 |
+
ger5 g e5 rr
|
2551 |
+
geir1 g ei1 rr
|
2552 |
+
geir2 g ei2 rr
|
2553 |
+
geir3 g ei3 rr
|
2554 |
+
geir4 g ei4 rr
|
2555 |
+
geir5 g ei5 rr
|
2556 |
+
genr1 g en1 rr
|
2557 |
+
genr2 g en2 rr
|
2558 |
+
genr3 g en3 rr
|
2559 |
+
genr4 g en4 rr
|
2560 |
+
genr5 g en5 rr
|
2561 |
+
gengr1 g eng1 rr
|
2562 |
+
gengr2 g eng2 rr
|
2563 |
+
gengr3 g eng3 rr
|
2564 |
+
gengr4 g eng4 rr
|
2565 |
+
gengr5 g eng5 rr
|
2566 |
+
gongr1 g ong1 rr
|
2567 |
+
gongr2 g ong2 rr
|
2568 |
+
gongr3 g ong3 rr
|
2569 |
+
gongr4 g ong4 rr
|
2570 |
+
gongr5 g ong5 rr
|
2571 |
+
gour1 g ou1 rr
|
2572 |
+
gour2 g ou2 rr
|
2573 |
+
gour3 g ou3 rr
|
2574 |
+
gour4 g ou4 rr
|
2575 |
+
gour5 g ou5 rr
|
2576 |
+
gur1 g u1 rr
|
2577 |
+
gur2 g u2 rr
|
2578 |
+
gur3 g u3 rr
|
2579 |
+
gur4 g u4 rr
|
2580 |
+
gur5 g u5 rr
|
2581 |
+
guar1 g ua1 rr
|
2582 |
+
guar2 g ua2 rr
|
2583 |
+
guar3 g ua3 rr
|
2584 |
+
guar4 g ua4 rr
|
2585 |
+
guar5 g ua5 rr
|
2586 |
+
guair1 g uai1 rr
|
2587 |
+
guair2 g uai2 rr
|
2588 |
+
guair3 g uai3 rr
|
2589 |
+
guair4 g uai4 rr
|
2590 |
+
guair5 g uai5 rr
|
2591 |
+
guanr1 g uan1 rr
|
2592 |
+
guanr2 g uan2 rr
|
2593 |
+
guanr3 g uan3 rr
|
2594 |
+
guanr4 g uan4 rr
|
2595 |
+
guanr5 g uan5 rr
|
2596 |
+
guangr1 g uang1 rr
|
2597 |
+
guangr2 g uang2 rr
|
2598 |
+
guangr3 g uang3 rr
|
2599 |
+
guangr4 g uang4 rr
|
2600 |
+
guangr5 g uang5 rr
|
2601 |
+
guir1 g uei1 rr
|
2602 |
+
guir2 g uei2 rr
|
2603 |
+
guir3 g uei3 rr
|
2604 |
+
guir4 g uei4 rr
|
2605 |
+
guir5 g uei5 rr
|
2606 |
+
gunr1 g uen1 rr
|
2607 |
+
gunr2 g uen2 rr
|
2608 |
+
gunr3 g uen3 rr
|
2609 |
+
gunr4 g uen4 rr
|
2610 |
+
gunr5 g uen5 rr
|
2611 |
+
guor1 g uo1 rr
|
2612 |
+
guor2 g uo2 rr
|
2613 |
+
guor3 g uo3 rr
|
2614 |
+
guor4 g uo4 rr
|
2615 |
+
guor5 g uo5 rr
|
2616 |
+
har1 h a1 rr
|
2617 |
+
har2 h a2 rr
|
2618 |
+
har3 h a3 rr
|
2619 |
+
har4 h a4 rr
|
2620 |
+
har5 h a5 rr
|
2621 |
+
hair1 h ai1 rr
|
2622 |
+
hair2 h ai2 rr
|
2623 |
+
hair3 h ai3 rr
|
2624 |
+
hair4 h ai4 rr
|
2625 |
+
hair5 h ai5 rr
|
2626 |
+
hanr1 h an1 rr
|
2627 |
+
hanr2 h an2 rr
|
2628 |
+
hanr3 h an3 rr
|
2629 |
+
hanr4 h an4 rr
|
2630 |
+
hanr5 h an5 rr
|
2631 |
+
hangr1 h ang1 rr
|
2632 |
+
hangr2 h ang2 rr
|
2633 |
+
hangr3 h ang3 rr
|
2634 |
+
hangr4 h ang4 rr
|
2635 |
+
hangr5 h ang5 rr
|
2636 |
+
haor1 h ao1 rr
|
2637 |
+
haor2 h ao2 rr
|
2638 |
+
haor3 h ao3 rr
|
2639 |
+
haor4 h ao4 rr
|
2640 |
+
haor5 h ao5 rr
|
2641 |
+
her1 h e1 rr
|
2642 |
+
her2 h e2 rr
|
2643 |
+
her3 h e3 rr
|
2644 |
+
her4 h e4 rr
|
2645 |
+
her5 h e5 rr
|
2646 |
+
heir1 h ei1 rr
|
2647 |
+
heir2 h ei2 rr
|
2648 |
+
heir3 h ei3 rr
|
2649 |
+
heir4 h ei4 rr
|
2650 |
+
heir5 h ei5 rr
|
2651 |
+
henr1 h en1 rr
|
2652 |
+
henr2 h en2 rr
|
2653 |
+
henr3 h en3 rr
|
2654 |
+
henr4 h en4 rr
|
2655 |
+
henr5 h en5 rr
|
2656 |
+
hengr1 h eng1 rr
|
2657 |
+
hengr2 h eng2 rr
|
2658 |
+
hengr3 h eng3 rr
|
2659 |
+
hengr4 h eng4 rr
|
2660 |
+
hengr5 h eng5 rr
|
2661 |
+
hongr1 h ong1 rr
|
2662 |
+
hongr2 h ong2 rr
|
2663 |
+
hongr3 h ong3 rr
|
2664 |
+
hongr4 h ong4 rr
|
2665 |
+
hongr5 h ong5 rr
|
2666 |
+
hour1 h ou1 rr
|
2667 |
+
hour2 h ou2 rr
|
2668 |
+
hour3 h ou3 rr
|
2669 |
+
hour4 h ou4 rr
|
2670 |
+
hour5 h ou5 rr
|
2671 |
+
hur1 h u1 rr
|
2672 |
+
hur2 h u2 rr
|
2673 |
+
hur3 h u3 rr
|
2674 |
+
hur4 h u4 rr
|
2675 |
+
hur5 h u5 rr
|
2676 |
+
huar1 h ua1 rr
|
2677 |
+
huar2 h ua2 rr
|
2678 |
+
huar3 h ua3 rr
|
2679 |
+
huar4 h ua4 rr
|
2680 |
+
huar5 h ua5 rr
|
2681 |
+
huair1 h uai1 rr
|
2682 |
+
huair2 h uai2 rr
|
2683 |
+
huair3 h uai3 rr
|
2684 |
+
huair4 h uai4 rr
|
2685 |
+
huair5 h uai5 rr
|
2686 |
+
huanr1 h uan1 rr
|
2687 |
+
huanr2 h uan2 rr
|
2688 |
+
huanr3 h uan3 rr
|
2689 |
+
huanr4 h uan4 rr
|
2690 |
+
huanr5 h uan5 rr
|
2691 |
+
huangr1 h uang1 rr
|
2692 |
+
huangr2 h uang2 rr
|
2693 |
+
huangr3 h uang3 rr
|
2694 |
+
huangr4 h uang4 rr
|
2695 |
+
huangr5 h uang5 rr
|
2696 |
+
huir1 h uei1 rr
|
2697 |
+
huir2 h uei2 rr
|
2698 |
+
huir3 h uei3 rr
|
2699 |
+
huir4 h uei4 rr
|
2700 |
+
huir5 h uei5 rr
|
2701 |
+
hunr1 h uen1 rr
|
2702 |
+
hunr2 h uen2 rr
|
2703 |
+
hunr3 h uen3 rr
|
2704 |
+
hunr4 h uen4 rr
|
2705 |
+
hunr5 h uen5 rr
|
2706 |
+
huor1 h uo1 rr
|
2707 |
+
huor2 h uo2 rr
|
2708 |
+
huor3 h uo3 rr
|
2709 |
+
huor4 h uo4 rr
|
2710 |
+
huor5 h uo5 rr
|
2711 |
+
jir1 j i1 rr
|
2712 |
+
jir2 j i2 rr
|
2713 |
+
jir3 j i3 rr
|
2714 |
+
jir4 j i4 rr
|
2715 |
+
jir5 j i5 rr
|
2716 |
+
jiar1 j ia1 rr
|
2717 |
+
jiar2 j ia2 rr
|
2718 |
+
jiar3 j ia3 rr
|
2719 |
+
jiar4 j ia4 rr
|
2720 |
+
jiar5 j ia5 rr
|
2721 |
+
jianr1 j ian1 rr
|
2722 |
+
jianr2 j ian2 rr
|
2723 |
+
jianr3 j ian3 rr
|
2724 |
+
jianr4 j ian4 rr
|
2725 |
+
jianr5 j ian5 rr
|
2726 |
+
jiangr1 j iang1 rr
|
2727 |
+
jiangr2 j iang2 rr
|
2728 |
+
jiangr3 j iang3 rr
|
2729 |
+
jiangr4 j iang4 rr
|
2730 |
+
jiangr5 j iang5 rr
|
2731 |
+
jiaor1 j iao1 rr
|
2732 |
+
jiaor2 j iao2 rr
|
2733 |
+
jiaor3 j iao3 rr
|
2734 |
+
jiaor4 j iao4 rr
|
2735 |
+
jiaor5 j iao5 rr
|
2736 |
+
jier1 j ie1 rr
|
2737 |
+
jier2 j ie2 rr
|
2738 |
+
jier3 j ie3 rr
|
2739 |
+
jier4 j ie4 rr
|
2740 |
+
jier5 j ie5 rr
|
2741 |
+
jinr1 j in1 rr
|
2742 |
+
jinr2 j in2 rr
|
2743 |
+
jinr3 j in3 rr
|
2744 |
+
jinr4 j in4 rr
|
2745 |
+
jinr5 j in5 rr
|
2746 |
+
jingr1 j ing1 rr
|
2747 |
+
jingr2 j ing2 rr
|
2748 |
+
jingr3 j ing3 rr
|
2749 |
+
jingr4 j ing4 rr
|
2750 |
+
jingr5 j ing5 rr
|
2751 |
+
jiongr1 j iong1 rr
|
2752 |
+
jiongr2 j iong2 rr
|
2753 |
+
jiongr3 j iong3 rr
|
2754 |
+
jiongr4 j iong4 rr
|
2755 |
+
jiongr5 j iong5 rr
|
2756 |
+
jiur1 j iou1 rr
|
2757 |
+
jiur2 j iou2 rr
|
2758 |
+
jiur3 j iou3 rr
|
2759 |
+
jiur4 j iou4 rr
|
2760 |
+
jiur5 j iou5 rr
|
2761 |
+
jur1 j v1 rr
|
2762 |
+
jur2 j v2 rr
|
2763 |
+
jur3 j v3 rr
|
2764 |
+
jur4 j v4 rr
|
2765 |
+
jur5 j v5 rr
|
2766 |
+
juanr1 j van1 rr
|
2767 |
+
juanr2 j van2 rr
|
2768 |
+
juanr3 j van3 rr
|
2769 |
+
juanr4 j van4 rr
|
2770 |
+
juanr5 j van5 rr
|
2771 |
+
juer1 j ve1 rr
|
2772 |
+
juer2 j ve2 rr
|
2773 |
+
juer3 j ve3 rr
|
2774 |
+
juer4 j ve4 rr
|
2775 |
+
juer5 j ve5 rr
|
2776 |
+
junr1 j vn1 rr
|
2777 |
+
junr2 j vn2 rr
|
2778 |
+
junr3 j vn3 rr
|
2779 |
+
junr4 j vn4 rr
|
2780 |
+
junr5 j vn5 rr
|
2781 |
+
kar1 k a1 rr
|
2782 |
+
kar2 k a2 rr
|
2783 |
+
kar3 k a3 rr
|
2784 |
+
kar4 k a4 rr
|
2785 |
+
kar5 k a5 rr
|
2786 |
+
kair1 k ai1 rr
|
2787 |
+
kair2 k ai2 rr
|
2788 |
+
kair3 k ai3 rr
|
2789 |
+
kair4 k ai4 rr
|
2790 |
+
kair5 k ai5 rr
|
2791 |
+
kanr1 k an1 rr
|
2792 |
+
kanr2 k an2 rr
|
2793 |
+
kanr3 k an3 rr
|
2794 |
+
kanr4 k an4 rr
|
2795 |
+
kanr5 k an5 rr
|
2796 |
+
kangr1 k ang1 rr
|
2797 |
+
kangr2 k ang2 rr
|
2798 |
+
kangr3 k ang3 rr
|
2799 |
+
kangr4 k ang4 rr
|
2800 |
+
kangr5 k ang5 rr
|
2801 |
+
kaor1 k ao1 rr
|
2802 |
+
kaor2 k ao2 rr
|
2803 |
+
kaor3 k ao3 rr
|
2804 |
+
kaor4 k ao4 rr
|
2805 |
+
kaor5 k ao5 rr
|
2806 |
+
ker1 k e1 rr
|
2807 |
+
ker2 k e2 rr
|
2808 |
+
ker3 k e3 rr
|
2809 |
+
ker4 k e4 rr
|
2810 |
+
ker5 k e5 rr
|
2811 |
+
keir1 k ei1 rr
|
2812 |
+
keir2 k ei2 rr
|
2813 |
+
keir3 k ei3 rr
|
2814 |
+
keir4 k ei4 rr
|
2815 |
+
keir5 k ei5 rr
|
2816 |
+
kenr1 k en1 rr
|
2817 |
+
kenr2 k en2 rr
|
2818 |
+
kenr3 k en3 rr
|
2819 |
+
kenr4 k en4 rr
|
2820 |
+
kenr5 k en5 rr
|
2821 |
+
kengr1 k eng1 rr
|
2822 |
+
kengr2 k eng2 rr
|
2823 |
+
kengr3 k eng3 rr
|
2824 |
+
kengr4 k eng4 rr
|
2825 |
+
kengr5 k eng5 rr
|
2826 |
+
kongr1 k ong1 rr
|
2827 |
+
kongr2 k ong2 rr
|
2828 |
+
kongr3 k ong3 rr
|
2829 |
+
kongr4 k ong4 rr
|
2830 |
+
kongr5 k ong5 rr
|
2831 |
+
kour1 k ou1 rr
|
2832 |
+
kour2 k ou2 rr
|
2833 |
+
kour3 k ou3 rr
|
2834 |
+
kour4 k ou4 rr
|
2835 |
+
kour5 k ou5 rr
|
2836 |
+
kur1 k u1 rr
|
2837 |
+
kur2 k u2 rr
|
2838 |
+
kur3 k u3 rr
|
2839 |
+
kur4 k u4 rr
|
2840 |
+
kur5 k u5 rr
|
2841 |
+
kuar1 k ua1 rr
|
2842 |
+
kuar2 k ua2 rr
|
2843 |
+
kuar3 k ua3 rr
|
2844 |
+
kuar4 k ua4 rr
|
2845 |
+
kuar5 k ua5 rr
|
2846 |
+
kuair1 k uai1 rr
|
2847 |
+
kuair2 k uai2 rr
|
2848 |
+
kuair3 k uai3 rr
|
2849 |
+
kuair4 k uai4 rr
|
2850 |
+
kuair5 k uai5 rr
|
2851 |
+
kuanr1 k uan1 rr
|
2852 |
+
kuanr2 k uan2 rr
|
2853 |
+
kuanr3 k uan3 rr
|
2854 |
+
kuanr4 k uan4 rr
|
2855 |
+
kuanr5 k uan5 rr
|
2856 |
+
kuangr1 k uang1 rr
|
2857 |
+
kuangr2 k uang2 rr
|
2858 |
+
kuangr3 k uang3 rr
|
2859 |
+
kuangr4 k uang4 rr
|
2860 |
+
kuangr5 k uang5 rr
|
2861 |
+
kuir1 k uei1 rr
|
2862 |
+
kuir2 k uei2 rr
|
2863 |
+
kuir3 k uei3 rr
|
2864 |
+
kuir4 k uei4 rr
|
2865 |
+
kuir5 k uei5 rr
|
2866 |
+
kunr1 k uen1 rr
|
2867 |
+
kunr2 k uen2 rr
|
2868 |
+
kunr3 k uen3 rr
|
2869 |
+
kunr4 k uen4 rr
|
2870 |
+
kunr5 k uen5 rr
|
2871 |
+
kuor1 k uo1 rr
|
2872 |
+
kuor2 k uo2 rr
|
2873 |
+
kuor3 k uo3 rr
|
2874 |
+
kuor4 k uo4 rr
|
2875 |
+
kuor5 k uo5 rr
|
2876 |
+
lar1 l a1 rr
|
2877 |
+
lar2 l a2 rr
|
2878 |
+
lar3 l a3 rr
|
2879 |
+
lar4 l a4 rr
|
2880 |
+
lar5 l a5 rr
|
2881 |
+
lair1 l ai1 rr
|
2882 |
+
lair2 l ai2 rr
|
2883 |
+
lair3 l ai3 rr
|
2884 |
+
lair4 l ai4 rr
|
2885 |
+
lair5 l ai5 rr
|
2886 |
+
lanr1 l an1 rr
|
2887 |
+
lanr2 l an2 rr
|
2888 |
+
lanr3 l an3 rr
|
2889 |
+
lanr4 l an4 rr
|
2890 |
+
lanr5 l an5 rr
|
2891 |
+
langr1 l ang1 rr
|
2892 |
+
langr2 l ang2 rr
|
2893 |
+
langr3 l ang3 rr
|
2894 |
+
langr4 l ang4 rr
|
2895 |
+
langr5 l ang5 rr
|
2896 |
+
laor1 l ao1 rr
|
2897 |
+
laor2 l ao2 rr
|
2898 |
+
laor3 l ao3 rr
|
2899 |
+
laor4 l ao4 rr
|
2900 |
+
laor5 l ao5 rr
|
2901 |
+
ler1 l e1 rr
|
2902 |
+
ler2 l e2 rr
|
2903 |
+
ler3 l e3 rr
|
2904 |
+
ler4 l e4 rr
|
2905 |
+
ler5 l e5 rr
|
2906 |
+
leir1 l ei1 rr
|
2907 |
+
leir2 l ei2 rr
|
2908 |
+
leir3 l ei3 rr
|
2909 |
+
leir4 l ei4 rr
|
2910 |
+
leir5 l ei5 rr
|
2911 |
+
lengr1 l eng1 rr
|
2912 |
+
lengr2 l eng2 rr
|
2913 |
+
lengr3 l eng3 rr
|
2914 |
+
lengr4 l eng4 rr
|
2915 |
+
lengr5 l eng5 rr
|
2916 |
+
lir1 l i1 rr
|
2917 |
+
lir2 l i2 rr
|
2918 |
+
lir3 l i3 rr
|
2919 |
+
lir4 l i4 rr
|
2920 |
+
lir5 l i5 rr
|
2921 |
+
liar1 l ia1 rr
|
2922 |
+
liar2 l ia2 rr
|
2923 |
+
liar3 l ia3 rr
|
2924 |
+
liar4 l ia4 rr
|
2925 |
+
liar5 l ia5 rr
|
2926 |
+
lianr1 l ian1 rr
|
2927 |
+
lianr2 l ian2 rr
|
2928 |
+
lianr3 l ian3 rr
|
2929 |
+
lianr4 l ian4 rr
|
2930 |
+
lianr5 l ian5 rr
|
2931 |
+
liangr1 l iang1 rr
|
2932 |
+
liangr2 l iang2 rr
|
2933 |
+
liangr3 l iang3 rr
|
2934 |
+
liangr4 l iang4 rr
|
2935 |
+
liangr5 l iang5 rr
|
2936 |
+
liaor1 l iao1 rr
|
2937 |
+
liaor2 l iao2 rr
|
2938 |
+
liaor3 l iao3 rr
|
2939 |
+
liaor4 l iao4 rr
|
2940 |
+
liaor5 l iao5 rr
|
2941 |
+
lier1 l ie1 rr
|
2942 |
+
lier2 l ie2 rr
|
2943 |
+
lier3 l ie3 rr
|
2944 |
+
lier4 l ie4 rr
|
2945 |
+
lier5 l ie5 rr
|
2946 |
+
linr1 l in1 rr
|
2947 |
+
linr2 l in2 rr
|
2948 |
+
linr3 l in3 rr
|
2949 |
+
linr4 l in4 rr
|
2950 |
+
linr5 l in5 rr
|
2951 |
+
lingr1 l ing1 rr
|
2952 |
+
lingr2 l ing2 rr
|
2953 |
+
lingr3 l ing3 rr
|
2954 |
+
lingr4 l ing4 rr
|
2955 |
+
lingr5 l ing5 rr
|
2956 |
+
liur1 l iou1 rr
|
2957 |
+
liur2 l iou2 rr
|
2958 |
+
liur3 l iou3 rr
|
2959 |
+
liur4 l iou4 rr
|
2960 |
+
liur5 l iou5 rr
|
2961 |
+
lor1 l o1 rr
|
2962 |
+
lor2 l o2 rr
|
2963 |
+
lor3 l o3 rr
|
2964 |
+
lor4 l o4 rr
|
2965 |
+
lor5 l o5 rr
|
2966 |
+
longr1 l ong1 rr
|
2967 |
+
longr2 l ong2 rr
|
2968 |
+
longr3 l ong3 rr
|
2969 |
+
longr4 l ong4 rr
|
2970 |
+
longr5 l ong5 rr
|
2971 |
+
lour1 l ou1 rr
|
2972 |
+
lour2 l ou2 rr
|
2973 |
+
lour3 l ou3 rr
|
2974 |
+
lour4 l ou4 rr
|
2975 |
+
lour5 l ou5 rr
|
2976 |
+
lur1 l u1 rr
|
2977 |
+
lur2 l u2 rr
|
2978 |
+
lur3 l u3 rr
|
2979 |
+
lur4 l u4 rr
|
2980 |
+
lur5 l u5 rr
|
2981 |
+
luanr1 l uan1 rr
|
2982 |
+
luanr2 l uan2 rr
|
2983 |
+
luanr3 l uan3 rr
|
2984 |
+
luanr4 l uan4 rr
|
2985 |
+
luanr5 l uan5 rr
|
2986 |
+
luer1 l ve1 rr
|
2987 |
+
luer2 l ve2 rr
|
2988 |
+
luer3 l ve3 rr
|
2989 |
+
luer4 l ve4 rr
|
2990 |
+
luer5 l ve5 rr
|
2991 |
+
lver1 l ve1 rr
|
2992 |
+
lver2 l ve2 rr
|
2993 |
+
lver3 l ve3 rr
|
2994 |
+
lver4 l ve4 rr
|
2995 |
+
lver5 l ve5 rr
|
2996 |
+
lunr1 l uen1 rr
|
2997 |
+
lunr2 l uen2 rr
|
2998 |
+
lunr3 l uen3 rr
|
2999 |
+
lunr4 l uen4 rr
|
3000 |
+
lunr5 l uen5 rr
|
3001 |
+
luor1 l uo1 rr
|
3002 |
+
luor2 l uo2 rr
|
3003 |
+
luor3 l uo3 rr
|
3004 |
+
luor4 l uo4 rr
|
3005 |
+
luor5 l uo5 rr
|
3006 |
+
lvr1 l v1 rr
|
3007 |
+
lvr2 l v2 rr
|
3008 |
+
lvr3 l v3 rr
|
3009 |
+
lvr4 l v4 rr
|
3010 |
+
lvr5 l v5 rr
|
3011 |
+
mar1 m a1 rr
|
3012 |
+
mar2 m a2 rr
|
3013 |
+
mar3 m a3 rr
|
3014 |
+
mar4 m a4 rr
|
3015 |
+
mar5 m a5 rr
|
3016 |
+
mair1 m ai1 rr
|
3017 |
+
mair2 m ai2 rr
|
3018 |
+
mair3 m ai3 rr
|
3019 |
+
mair4 m ai4 rr
|
3020 |
+
mair5 m ai5 rr
|
3021 |
+
manr1 m an1 rr
|
3022 |
+
manr2 m an2 rr
|
3023 |
+
manr3 m an3 rr
|
3024 |
+
manr4 m an4 rr
|
3025 |
+
manr5 m an5 rr
|
3026 |
+
mangr1 m ang1 rr
|
3027 |
+
mangr2 m ang2 rr
|
3028 |
+
mangr3 m ang3 rr
|
3029 |
+
mangr4 m ang4 rr
|
3030 |
+
mangr5 m ang5 rr
|
3031 |
+
maor1 m ao1 rr
|
3032 |
+
maor2 m ao2 rr
|
3033 |
+
maor3 m ao3 rr
|
3034 |
+
maor4 m ao4 rr
|
3035 |
+
maor5 m ao5 rr
|
3036 |
+
mer1 m e1 rr
|
3037 |
+
mer2 m e2 rr
|
3038 |
+
mer3 m e3 rr
|
3039 |
+
mer4 m e4 rr
|
3040 |
+
mer5 m e5 rr
|
3041 |
+
meir1 m ei1 rr
|
3042 |
+
meir2 m ei2 rr
|
3043 |
+
meir3 m ei3 rr
|
3044 |
+
meir4 m ei4 rr
|
3045 |
+
meir5 m ei5 rr
|
3046 |
+
menr1 m en1 rr
|
3047 |
+
menr2 m en2 rr
|
3048 |
+
menr3 m en3 rr
|
3049 |
+
menr4 m en4 rr
|
3050 |
+
menr5 m en5 rr
|
3051 |
+
mengr1 m eng1 rr
|
3052 |
+
mengr2 m eng2 rr
|
3053 |
+
mengr3 m eng3 rr
|
3054 |
+
mengr4 m eng4 rr
|
3055 |
+
mengr5 m eng5 rr
|
3056 |
+
mir1 m i1 rr
|
3057 |
+
mir2 m i2 rr
|
3058 |
+
mir3 m i3 rr
|
3059 |
+
mir4 m i4 rr
|
3060 |
+
mir5 m i5 rr
|
3061 |
+
mianr1 m ian1 rr
|
3062 |
+
mianr2 m ian2 rr
|
3063 |
+
mianr3 m ian3 rr
|
3064 |
+
mianr4 m ian4 rr
|
3065 |
+
mianr5 m ian5 rr
|
3066 |
+
miaor1 m iao1 rr
|
3067 |
+
miaor2 m iao2 rr
|
3068 |
+
miaor3 m iao3 rr
|
3069 |
+
miaor4 m iao4 rr
|
3070 |
+
miaor5 m iao5 rr
|
3071 |
+
mier1 m ie1 rr
|
3072 |
+
mier2 m ie2 rr
|
3073 |
+
mier3 m ie3 rr
|
3074 |
+
mier4 m ie4 rr
|
3075 |
+
mier5 m ie5 rr
|
3076 |
+
minr1 m in1 rr
|
3077 |
+
minr2 m in2 rr
|
3078 |
+
minr3 m in3 rr
|
3079 |
+
minr4 m in4 rr
|
3080 |
+
minr5 m in5 rr
|
3081 |
+
mingr1 m ing1 rr
|
3082 |
+
mingr2 m ing2 rr
|
3083 |
+
mingr3 m ing3 rr
|
3084 |
+
mingr4 m ing4 rr
|
3085 |
+
mingr5 m ing5 rr
|
3086 |
+
miur1 m iou1 rr
|
3087 |
+
miur2 m iou2 rr
|
3088 |
+
miur3 m iou3 rr
|
3089 |
+
miur4 m iou4 rr
|
3090 |
+
miur5 m iou5 rr
|
3091 |
+
mor1 m o1 rr
|
3092 |
+
mor2 m o2 rr
|
3093 |
+
mor3 m o3 rr
|
3094 |
+
mor4 m o4 rr
|
3095 |
+
mor5 m o5 rr
|
3096 |
+
mour1 m ou1 rr
|
3097 |
+
mour2 m ou2 rr
|
3098 |
+
mour3 m ou3 rr
|
3099 |
+
mour4 m ou4 rr
|
3100 |
+
mour5 m ou5 rr
|
3101 |
+
mur1 m u1 rr
|
3102 |
+
mur2 m u2 rr
|
3103 |
+
mur3 m u3 rr
|
3104 |
+
mur4 m u4 rr
|
3105 |
+
mur5 m u5 rr
|
3106 |
+
nar1 n a1 rr
|
3107 |
+
nar2 n a2 rr
|
3108 |
+
nar3 n a3 rr
|
3109 |
+
nar4 n a4 rr
|
3110 |
+
nar5 n a5 rr
|
3111 |
+
nair1 n ai1 rr
|
3112 |
+
nair2 n ai2 rr
|
3113 |
+
nair3 n ai3 rr
|
3114 |
+
nair4 n ai4 rr
|
3115 |
+
nair5 n ai5 rr
|
3116 |
+
nanr1 n an1 rr
|
3117 |
+
nanr2 n an2 rr
|
3118 |
+
nanr3 n an3 rr
|
3119 |
+
nanr4 n an4 rr
|
3120 |
+
nanr5 n an5 rr
|
3121 |
+
nangr1 n ang1 rr
|
3122 |
+
nangr2 n ang2 rr
|
3123 |
+
nangr3 n ang3 rr
|
3124 |
+
nangr4 n ang4 rr
|
3125 |
+
nangr5 n ang5 rr
|
3126 |
+
naor1 n ao1 rr
|
3127 |
+
naor2 n ao2 rr
|
3128 |
+
naor3 n ao3 rr
|
3129 |
+
naor4 n ao4 rr
|
3130 |
+
naor5 n ao5 rr
|
3131 |
+
ner1 n e1 rr
|
3132 |
+
ner2 n e2 rr
|
3133 |
+
ner3 n e3 rr
|
3134 |
+
ner4 n e4 rr
|
3135 |
+
ner5 n e5 rr
|
3136 |
+
neir1 n ei1 rr
|
3137 |
+
neir2 n ei2 rr
|
3138 |
+
neir3 n ei3 rr
|
3139 |
+
neir4 n ei4 rr
|
3140 |
+
neir5 n ei5 rr
|
3141 |
+
nenr1 n en1 rr
|
3142 |
+
nenr2 n en2 rr
|
3143 |
+
nenr3 n en3 rr
|
3144 |
+
nenr4 n en4 rr
|
3145 |
+
nenr5 n en5 rr
|
3146 |
+
nengr1 n eng1 rr
|
3147 |
+
nengr2 n eng2 rr
|
3148 |
+
nengr3 n eng3 rr
|
3149 |
+
nengr4 n eng4 rr
|
3150 |
+
nengr5 n eng5 rr
|
3151 |
+
nir1 n i1 rr
|
3152 |
+
nir2 n i2 rr
|
3153 |
+
nir3 n i3 rr
|
3154 |
+
nir4 n i4 rr
|
3155 |
+
nir5 n i5 rr
|
3156 |
+
nianr1 n ian1 rr
|
3157 |
+
nianr2 n ian2 rr
|
3158 |
+
nianr3 n ian3 rr
|
3159 |
+
nianr4 n ian4 rr
|
3160 |
+
nianr5 n ian5 rr
|
3161 |
+
niangr1 n iang1 rr
|
3162 |
+
niangr2 n iang2 rr
|
3163 |
+
niangr3 n iang3 rr
|
3164 |
+
niangr4 n iang4 rr
|
3165 |
+
niangr5 n iang5 rr
|
3166 |
+
niaor1 n iao1 rr
|
3167 |
+
niaor2 n iao2 rr
|
3168 |
+
niaor3 n iao3 rr
|
3169 |
+
niaor4 n iao4 rr
|
3170 |
+
niaor5 n iao5 rr
|
3171 |
+
nier1 n ie1 rr
|
3172 |
+
nier2 n ie2 rr
|
3173 |
+
nier3 n ie3 rr
|
3174 |
+
nier4 n ie4 rr
|
3175 |
+
nier5 n ie5 rr
|
3176 |
+
ninr1 n in1 rr
|
3177 |
+
ninr2 n in2 rr
|
3178 |
+
ninr3 n in3 rr
|
3179 |
+
ninr4 n in4 rr
|
3180 |
+
ninr5 n in5 rr
|
3181 |
+
ningr1 n ing1 rr
|
3182 |
+
ningr2 n ing2 rr
|
3183 |
+
ningr3 n ing3 rr
|
3184 |
+
ningr4 n ing4 rr
|
3185 |
+
ningr5 n ing5 rr
|
3186 |
+
niur1 n iou1 rr
|
3187 |
+
niur2 n iou2 rr
|
3188 |
+
niur3 n iou3 rr
|
3189 |
+
niur4 n iou4 rr
|
3190 |
+
niur5 n iou5 rr
|
3191 |
+
nongr1 n ong1 rr
|
3192 |
+
nongr2 n ong2 rr
|
3193 |
+
nongr3 n ong3 rr
|
3194 |
+
nongr4 n ong4 rr
|
3195 |
+
nongr5 n ong5 rr
|
3196 |
+
nour1 n ou1 rr
|
3197 |
+
nour2 n ou2 rr
|
3198 |
+
nour3 n ou3 rr
|
3199 |
+
nour4 n ou4 rr
|
3200 |
+
nour5 n ou5 rr
|
3201 |
+
nur1 n u1 rr
|
3202 |
+
nur2 n u2 rr
|
3203 |
+
nur3 n u3 rr
|
3204 |
+
nur4 n u4 rr
|
3205 |
+
nur5 n u5 rr
|
3206 |
+
nuanr1 n uan1 rr
|
3207 |
+
nuanr2 n uan2 rr
|
3208 |
+
nuanr3 n uan3 rr
|
3209 |
+
nuanr4 n uan4 rr
|
3210 |
+
nuanr5 n uan5 rr
|
3211 |
+
nuer1 n ve1 rr
|
3212 |
+
nuer2 n ve2 rr
|
3213 |
+
nuer3 n ve3 rr
|
3214 |
+
nuer4 n ve4 rr
|
3215 |
+
nuer5 n ve5 rr
|
3216 |
+
nver1 n ve1 rr
|
3217 |
+
nver2 n ve2 rr
|
3218 |
+
nver3 n ve3 rr
|
3219 |
+
nver4 n ve4 rr
|
3220 |
+
nver5 n ve5 rr
|
3221 |
+
nuor1 n uo1 rr
|
3222 |
+
nuor2 n uo2 rr
|
3223 |
+
nuor3 n uo3 rr
|
3224 |
+
nuor4 n uo4 rr
|
3225 |
+
nuor5 n uo5 rr
|
3226 |
+
nvr1 n v1 rr
|
3227 |
+
nvr2 n v2 rr
|
3228 |
+
nvr3 n v3 rr
|
3229 |
+
nvr4 n v4 rr
|
3230 |
+
nvr5 n v5 rr
|
3231 |
+
or1 o1 rr
|
3232 |
+
or2 o2 rr
|
3233 |
+
or3 o3 rr
|
3234 |
+
or4 o4 rr
|
3235 |
+
or5 o5 rr
|
3236 |
+
our1 ou1 rr
|
3237 |
+
our2 ou2 rr
|
3238 |
+
our3 ou3 rr
|
3239 |
+
our4 ou4 rr
|
3240 |
+
our5 ou5 rr
|
3241 |
+
par1 p a1 rr
|
3242 |
+
par2 p a2 rr
|
3243 |
+
par3 p a3 rr
|
3244 |
+
par4 p a4 rr
|
3245 |
+
par5 p a5 rr
|
3246 |
+
pair1 p ai1 rr
|
3247 |
+
pair2 p ai2 rr
|
3248 |
+
pair3 p ai3 rr
|
3249 |
+
pair4 p ai4 rr
|
3250 |
+
pair5 p ai5 rr
|
3251 |
+
panr1 p an1 rr
|
3252 |
+
panr2 p an2 rr
|
3253 |
+
panr3 p an3 rr
|
3254 |
+
panr4 p an4 rr
|
3255 |
+
panr5 p an5 rr
|
3256 |
+
pangr1 p ang1 rr
|
3257 |
+
pangr2 p ang2 rr
|
3258 |
+
pangr3 p ang3 rr
|
3259 |
+
pangr4 p ang4 rr
|
3260 |
+
pangr5 p ang5 rr
|
3261 |
+
paor1 p ao1 rr
|
3262 |
+
paor2 p ao2 rr
|
3263 |
+
paor3 p ao3 rr
|
3264 |
+
paor4 p ao4 rr
|
3265 |
+
paor5 p ao5 rr
|
3266 |
+
peir1 p ei1 rr
|
3267 |
+
peir2 p ei2 rr
|
3268 |
+
peir3 p ei3 rr
|
3269 |
+
peir4 p ei4 rr
|
3270 |
+
peir5 p ei5 rr
|
3271 |
+
penr1 p en1 rr
|
3272 |
+
penr2 p en2 rr
|
3273 |
+
penr3 p en3 rr
|
3274 |
+
penr4 p en4 rr
|
3275 |
+
penr5 p en5 rr
|
3276 |
+
pengr1 p eng1 rr
|
3277 |
+
pengr2 p eng2 rr
|
3278 |
+
pengr3 p eng3 rr
|
3279 |
+
pengr4 p eng4 rr
|
3280 |
+
pengr5 p eng5 rr
|
3281 |
+
pir1 p i1 rr
|
3282 |
+
pir2 p i2 rr
|
3283 |
+
pir3 p i3 rr
|
3284 |
+
pir4 p i4 rr
|
3285 |
+
pir5 p i5 rr
|
3286 |
+
pianr1 p ian1 rr
|
3287 |
+
pianr2 p ian2 rr
|
3288 |
+
pianr3 p ian3 rr
|
3289 |
+
pianr4 p ian4 rr
|
3290 |
+
pianr5 p ian5 rr
|
3291 |
+
piaor1 p iao1 rr
|
3292 |
+
piaor2 p iao2 rr
|
3293 |
+
piaor3 p iao3 rr
|
3294 |
+
piaor4 p iao4 rr
|
3295 |
+
piaor5 p iao5 rr
|
3296 |
+
pier1 p ie1 rr
|
3297 |
+
pier2 p ie2 rr
|
3298 |
+
pier3 p ie3 rr
|
3299 |
+
pier4 p ie4 rr
|
3300 |
+
pier5 p ie5 rr
|
3301 |
+
pinr1 p in1 rr
|
3302 |
+
pinr2 p in2 rr
|
3303 |
+
pinr3 p in3 rr
|
3304 |
+
pinr4 p in4 rr
|
3305 |
+
pinr5 p in5 rr
|
3306 |
+
pingr1 p ing1 rr
|
3307 |
+
pingr2 p ing2 rr
|
3308 |
+
pingr3 p ing3 rr
|
3309 |
+
pingr4 p ing4 rr
|
3310 |
+
pingr5 p ing5 rr
|
3311 |
+
por1 p o1 rr
|
3312 |
+
por2 p o2 rr
|
3313 |
+
por3 p o3 rr
|
3314 |
+
por4 p o4 rr
|
3315 |
+
por5 p o5 rr
|
3316 |
+
pour1 p ou1 rr
|
3317 |
+
pour2 p ou2 rr
|
3318 |
+
pour3 p ou3 rr
|
3319 |
+
pour4 p ou4 rr
|
3320 |
+
pour5 p ou5 rr
|
3321 |
+
pur1 p u1 rr
|
3322 |
+
pur2 p u2 rr
|
3323 |
+
pur3 p u3 rr
|
3324 |
+
pur4 p u4 rr
|
3325 |
+
pur5 p u5 rr
|
3326 |
+
qir1 q i1 rr
|
3327 |
+
qir2 q i2 rr
|
3328 |
+
qir3 q i3 rr
|
3329 |
+
qir4 q i4 rr
|
3330 |
+
qir5 q i5 rr
|
3331 |
+
qiar1 q ia1 rr
|
3332 |
+
qiar2 q ia2 rr
|
3333 |
+
qiar3 q ia3 rr
|
3334 |
+
qiar4 q ia4 rr
|
3335 |
+
qiar5 q ia5 rr
|
3336 |
+
qianr1 q ian1 rr
|
3337 |
+
qianr2 q ian2 rr
|
3338 |
+
qianr3 q ian3 rr
|
3339 |
+
qianr4 q ian4 rr
|
3340 |
+
qianr5 q ian5 rr
|
3341 |
+
qiangr1 q iang1 rr
|
3342 |
+
qiangr2 q iang2 rr
|
3343 |
+
qiangr3 q iang3 rr
|
3344 |
+
qiangr4 q iang4 rr
|
3345 |
+
qiangr5 q iang5 rr
|
3346 |
+
qiaor1 q iao1 rr
|
3347 |
+
qiaor2 q iao2 rr
|
3348 |
+
qiaor3 q iao3 rr
|
3349 |
+
qiaor4 q iao4 rr
|
3350 |
+
qiaor5 q iao5 rr
|
3351 |
+
qier1 q ie1 rr
|
3352 |
+
qier2 q ie2 rr
|
3353 |
+
qier3 q ie3 rr
|
3354 |
+
qier4 q ie4 rr
|
3355 |
+
qier5 q ie5 rr
|
3356 |
+
qinr1 q in1 rr
|
3357 |
+
qinr2 q in2 rr
|
3358 |
+
qinr3 q in3 rr
|
3359 |
+
qinr4 q in4 rr
|
3360 |
+
qinr5 q in5 rr
|
3361 |
+
qingr1 q ing1 rr
|
3362 |
+
qingr2 q ing2 rr
|
3363 |
+
qingr3 q ing3 rr
|
3364 |
+
qingr4 q ing4 rr
|
3365 |
+
qingr5 q ing5 rr
|
3366 |
+
qiongr1 q iong1 rr
|
3367 |
+
qiongr2 q iong2 rr
|
3368 |
+
qiongr3 q iong3 rr
|
3369 |
+
qiongr4 q iong4 rr
|
3370 |
+
qiongr5 q iong5 rr
|
3371 |
+
qiur1 q iou1 rr
|
3372 |
+
qiur2 q iou2 rr
|
3373 |
+
qiur3 q iou3 rr
|
3374 |
+
qiur4 q iou4 rr
|
3375 |
+
qiur5 q iou5 rr
|
3376 |
+
qur1 q v1 rr
|
3377 |
+
qur2 q v2 rr
|
3378 |
+
qur3 q v3 rr
|
3379 |
+
qur4 q v4 rr
|
3380 |
+
qur5 q v5 rr
|
3381 |
+
quanr1 q van1 rr
|
3382 |
+
quanr2 q van2 rr
|
3383 |
+
quanr3 q van3 rr
|
3384 |
+
quanr4 q van4 rr
|
3385 |
+
quanr5 q van5 rr
|
3386 |
+
quer1 q ve1 rr
|
3387 |
+
quer2 q ve2 rr
|
3388 |
+
quer3 q ve3 rr
|
3389 |
+
quer4 q ve4 rr
|
3390 |
+
quer5 q ve5 rr
|
3391 |
+
qunr1 q vn1 rr
|
3392 |
+
qunr2 q vn2 rr
|
3393 |
+
qunr3 q vn3 rr
|
3394 |
+
qunr4 q vn4 rr
|
3395 |
+
qunr5 q vn5 rr
|
3396 |
+
ranr1 r an1 rr
|
3397 |
+
ranr2 r an2 rr
|
3398 |
+
ranr3 r an3 rr
|
3399 |
+
ranr4 r an4 rr
|
3400 |
+
ranr5 r an5 rr
|
3401 |
+
rangr1 r ang1 rr
|
3402 |
+
rangr2 r ang2 rr
|
3403 |
+
rangr3 r ang3 rr
|
3404 |
+
rangr4 r ang4 rr
|
3405 |
+
rangr5 r ang5 rr
|
3406 |
+
raor1 r ao1 rr
|
3407 |
+
raor2 r ao2 rr
|
3408 |
+
raor3 r ao3 rr
|
3409 |
+
raor4 r ao4 rr
|
3410 |
+
raor5 r ao5 rr
|
3411 |
+
rer1 r e1 rr
|
3412 |
+
rer2 r e2 rr
|
3413 |
+
rer3 r e3 rr
|
3414 |
+
rer4 r e4 rr
|
3415 |
+
rer5 r e5 rr
|
3416 |
+
renr1 r en1 rr
|
3417 |
+
renr2 r en2 rr
|
3418 |
+
renr3 r en3 rr
|
3419 |
+
renr4 r en4 rr
|
3420 |
+
renr5 r en5 rr
|
3421 |
+
rengr1 r eng1 rr
|
3422 |
+
rengr2 r eng2 rr
|
3423 |
+
rengr3 r eng3 rr
|
3424 |
+
rengr4 r eng4 rr
|
3425 |
+
rengr5 r eng5 rr
|
3426 |
+
rir1 r iii1 rr
|
3427 |
+
rir2 r iii2 rr
|
3428 |
+
rir3 r iii3 rr
|
3429 |
+
rir4 r iii4 rr
|
3430 |
+
rir5 r iii5 rr
|
3431 |
+
rongr1 r ong1 rr
|
3432 |
+
rongr2 r ong2 rr
|
3433 |
+
rongr3 r ong3 rr
|
3434 |
+
rongr4 r ong4 rr
|
3435 |
+
rongr5 r ong5 rr
|
3436 |
+
rour1 r ou1 rr
|
3437 |
+
rour2 r ou2 rr
|
3438 |
+
rour3 r ou3 rr
|
3439 |
+
rour4 r ou4 rr
|
3440 |
+
rour5 r ou5 rr
|
3441 |
+
rur1 r u1 rr
|
3442 |
+
rur2 r u2 rr
|
3443 |
+
rur3 r u3 rr
|
3444 |
+
rur4 r u4 rr
|
3445 |
+
rur5 r u5 rr
|
3446 |
+
ruar1 r ua1 rr
|
3447 |
+
ruar2 r ua2 rr
|
3448 |
+
ruar3 r ua3 rr
|
3449 |
+
ruar4 r ua4 rr
|
3450 |
+
ruar5 r ua5 rr
|
3451 |
+
ruanr1 r uan1 rr
|
3452 |
+
ruanr2 r uan2 rr
|
3453 |
+
ruanr3 r uan3 rr
|
3454 |
+
ruanr4 r uan4 rr
|
3455 |
+
ruanr5 r uan5 rr
|
3456 |
+
ruir1 r uei1 rr
|
3457 |
+
ruir2 r uei2 rr
|
3458 |
+
ruir3 r uei3 rr
|
3459 |
+
ruir4 r uei4 rr
|
3460 |
+
ruir5 r uei5 rr
|
3461 |
+
runr1 r uen1 rr
|
3462 |
+
runr2 r uen2 rr
|
3463 |
+
runr3 r uen3 rr
|
3464 |
+
runr4 r uen4 rr
|
3465 |
+
runr5 r uen5 rr
|
3466 |
+
ruor1 r uo1 rr
|
3467 |
+
ruor2 r uo2 rr
|
3468 |
+
ruor3 r uo3 rr
|
3469 |
+
ruor4 r uo4 rr
|
3470 |
+
ruor5 r uo5 rr
|
3471 |
+
sar1 s a1 rr
|
3472 |
+
sar2 s a2 rr
|
3473 |
+
sar3 s a3 rr
|
3474 |
+
sar4 s a4 rr
|
3475 |
+
sar5 s a5 rr
|
3476 |
+
sair1 s ai1 rr
|
3477 |
+
sair2 s ai2 rr
|
3478 |
+
sair3 s ai3 rr
|
3479 |
+
sair4 s ai4 rr
|
3480 |
+
sair5 s ai5 rr
|
3481 |
+
sanr1 s an1 rr
|
3482 |
+
sanr2 s an2 rr
|
3483 |
+
sanr3 s an3 rr
|
3484 |
+
sanr4 s an4 rr
|
3485 |
+
sanr5 s an5 rr
|
3486 |
+
sangr1 s ang1 rr
|
3487 |
+
sangr2 s ang2 rr
|
3488 |
+
sangr3 s ang3 rr
|
3489 |
+
sangr4 s ang4 rr
|
3490 |
+
sangr5 s ang5 rr
|
3491 |
+
saor1 s ao1 rr
|
3492 |
+
saor2 s ao2 rr
|
3493 |
+
saor3 s ao3 rr
|
3494 |
+
saor4 s ao4 rr
|
3495 |
+
saor5 s ao5 rr
|
3496 |
+
ser1 s e1 rr
|
3497 |
+
ser2 s e2 rr
|
3498 |
+
ser3 s e3 rr
|
3499 |
+
ser4 s e4 rr
|
3500 |
+
ser5 s e5 rr
|
3501 |
+
senr1 s en1 rr
|
3502 |
+
senr2 s en2 rr
|
3503 |
+
senr3 s en3 rr
|
3504 |
+
senr4 s en4 rr
|
3505 |
+
senr5 s en5 rr
|
3506 |
+
sengr1 s eng1 rr
|
3507 |
+
sengr2 s eng2 rr
|
3508 |
+
sengr3 s eng3 rr
|
3509 |
+
sengr4 s eng4 rr
|
3510 |
+
sengr5 s eng5 rr
|
3511 |
+
shar1 sh a1 rr
|
3512 |
+
shar2 sh a2 rr
|
3513 |
+
shar3 sh a3 rr
|
3514 |
+
shar4 sh a4 rr
|
3515 |
+
shar5 sh a5 rr
|
3516 |
+
shair1 sh ai1 rr
|
3517 |
+
shair2 sh ai2 rr
|
3518 |
+
shair3 sh ai3 rr
|
3519 |
+
shair4 sh ai4 rr
|
3520 |
+
shair5 sh ai5 rr
|
3521 |
+
shanr1 sh an1 rr
|
3522 |
+
shanr2 sh an2 rr
|
3523 |
+
shanr3 sh an3 rr
|
3524 |
+
shanr4 sh an4 rr
|
3525 |
+
shanr5 sh an5 rr
|
3526 |
+
shangr1 sh ang1 rr
|
3527 |
+
shangr2 sh ang2 rr
|
3528 |
+
shangr3 sh ang3 rr
|
3529 |
+
shangr4 sh ang4 rr
|
3530 |
+
shangr5 sh ang5 rr
|
3531 |
+
shaor1 sh ao1 rr
|
3532 |
+
shaor2 sh ao2 rr
|
3533 |
+
shaor3 sh ao3 rr
|
3534 |
+
shaor4 sh ao4 rr
|
3535 |
+
shaor5 sh ao5 rr
|
3536 |
+
sher1 sh e1 rr
|
3537 |
+
sher2 sh e2 rr
|
3538 |
+
sher3 sh e3 rr
|
3539 |
+
sher4 sh e4 rr
|
3540 |
+
sher5 sh e5 rr
|
3541 |
+
sheir1 sh ei1 rr
|
3542 |
+
sheir2 sh ei2 rr
|
3543 |
+
sheir3 sh ei3 rr
|
3544 |
+
sheir4 sh ei4 rr
|
3545 |
+
sheir5 sh ei5 rr
|
3546 |
+
shenr1 sh en1 rr
|
3547 |
+
shenr2 sh en2 rr
|
3548 |
+
shenr3 sh en3 rr
|
3549 |
+
shenr4 sh en4 rr
|
3550 |
+
shenr5 sh en5 rr
|
3551 |
+
shengr1 sh eng1 rr
|
3552 |
+
shengr2 sh eng2 rr
|
3553 |
+
shengr3 sh eng3 rr
|
3554 |
+
shengr4 sh eng4 rr
|
3555 |
+
shengr5 sh eng5 rr
|
3556 |
+
shir1 sh iii1 rr
|
3557 |
+
shir2 sh iii2 rr
|
3558 |
+
shir3 sh iii3 rr
|
3559 |
+
shir4 sh iii4 rr
|
3560 |
+
shir5 sh iii5 rr
|
3561 |
+
shour1 sh ou1 rr
|
3562 |
+
shour2 sh ou2 rr
|
3563 |
+
shour3 sh ou3 rr
|
3564 |
+
shour4 sh ou4 rr
|
3565 |
+
shour5 sh ou5 rr
|
3566 |
+
shur1 sh u1 rr
|
3567 |
+
shur2 sh u2 rr
|
3568 |
+
shur3 sh u3 rr
|
3569 |
+
shur4 sh u4 rr
|
3570 |
+
shur5 sh u5 rr
|
3571 |
+
shuar1 sh ua1 rr
|
3572 |
+
shuar2 sh ua2 rr
|
3573 |
+
shuar3 sh ua3 rr
|
3574 |
+
shuar4 sh ua4 rr
|
3575 |
+
shuar5 sh ua5 rr
|
3576 |
+
shuair1 sh uai1 rr
|
3577 |
+
shuair2 sh uai2 rr
|
3578 |
+
shuair3 sh uai3 rr
|
3579 |
+
shuair4 sh uai4 rr
|
3580 |
+
shuair5 sh uai5 rr
|
3581 |
+
shuanr1 sh uan1 rr
|
3582 |
+
shuanr2 sh uan2 rr
|
3583 |
+
shuanr3 sh uan3 rr
|
3584 |
+
shuanr4 sh uan4 rr
|
3585 |
+
shuanr5 sh uan5 rr
|
3586 |
+
shuangr1 sh uang1 rr
|
3587 |
+
shuangr2 sh uang2 rr
|
3588 |
+
shuangr3 sh uang3 rr
|
3589 |
+
shuangr4 sh uang4 rr
|
3590 |
+
shuangr5 sh uang5 rr
|
3591 |
+
shuir1 sh uei1 rr
|
3592 |
+
shuir2 sh uei2 rr
|
3593 |
+
shuir3 sh uei3 rr
|
3594 |
+
shuir4 sh uei4 rr
|
3595 |
+
shuir5 sh uei5 rr
|
3596 |
+
shunr1 sh uen1 rr
|
3597 |
+
shunr2 sh uen2 rr
|
3598 |
+
shunr3 sh uen3 rr
|
3599 |
+
shunr4 sh uen4 rr
|
3600 |
+
shunr5 sh uen5 rr
|
3601 |
+
shuor1 sh uo1 rr
|
3602 |
+
shuor2 sh uo2 rr
|
3603 |
+
shuor3 sh uo3 rr
|
3604 |
+
shuor4 sh uo4 rr
|
3605 |
+
shuor5 sh uo5 rr
|
3606 |
+
sir1 s ii1 rr
|
3607 |
+
sir2 s ii2 rr
|
3608 |
+
sir3 s ii3 rr
|
3609 |
+
sir4 s ii4 rr
|
3610 |
+
sir5 s ii5 rr
|
3611 |
+
songr1 s ong1 rr
|
3612 |
+
songr2 s ong2 rr
|
3613 |
+
songr3 s ong3 rr
|
3614 |
+
songr4 s ong4 rr
|
3615 |
+
songr5 s ong5 rr
|
3616 |
+
sour1 s ou1 rr
|
3617 |
+
sour2 s ou2 rr
|
3618 |
+
sour3 s ou3 rr
|
3619 |
+
sour4 s ou4 rr
|
3620 |
+
sour5 s ou5 rr
|
3621 |
+
sur1 s u1 rr
|
3622 |
+
sur2 s u2 rr
|
3623 |
+
sur3 s u3 rr
|
3624 |
+
sur4 s u4 rr
|
3625 |
+
sur5 s u5 rr
|
3626 |
+
suanr1 s uan1 rr
|
3627 |
+
suanr2 s uan2 rr
|
3628 |
+
suanr3 s uan3 rr
|
3629 |
+
suanr4 s uan4 rr
|
3630 |
+
suanr5 s uan5 rr
|
3631 |
+
suir1 s uei1 rr
|
3632 |
+
suir2 s uei2 rr
|
3633 |
+
suir3 s uei3 rr
|
3634 |
+
suir4 s uei4 rr
|
3635 |
+
suir5 s uei5 rr
|
3636 |
+
sunr1 s uen1 rr
|
3637 |
+
sunr2 s uen2 rr
|
3638 |
+
sunr3 s uen3 rr
|
3639 |
+
sunr4 s uen4 rr
|
3640 |
+
sunr5 s uen5 rr
|
3641 |
+
suor1 s uo1 rr
|
3642 |
+
suor2 s uo2 rr
|
3643 |
+
suor3 s uo3 rr
|
3644 |
+
suor4 s uo4 rr
|
3645 |
+
suor5 s uo5 rr
|
3646 |
+
tar1 t a1 rr
|
3647 |
+
tar2 t a2 rr
|
3648 |
+
tar3 t a3 rr
|
3649 |
+
tar4 t a4 rr
|
3650 |
+
tar5 t a5 rr
|
3651 |
+
tair1 t ai1 rr
|
3652 |
+
tair2 t ai2 rr
|
3653 |
+
tair3 t ai3 rr
|
3654 |
+
tair4 t ai4 rr
|
3655 |
+
tair5 t ai5 rr
|
3656 |
+
tanr1 t an1 rr
|
3657 |
+
tanr2 t an2 rr
|
3658 |
+
tanr3 t an3 rr
|
3659 |
+
tanr4 t an4 rr
|
3660 |
+
tanr5 t an5 rr
|
3661 |
+
tangr1 t ang1 rr
|
3662 |
+
tangr2 t ang2 rr
|
3663 |
+
tangr3 t ang3 rr
|
3664 |
+
tangr4 t ang4 rr
|
3665 |
+
tangr5 t ang5 rr
|
3666 |
+
taor1 t ao1 rr
|
3667 |
+
taor2 t ao2 rr
|
3668 |
+
taor3 t ao3 rr
|
3669 |
+
taor4 t ao4 rr
|
3670 |
+
taor5 t ao5 rr
|
3671 |
+
ter1 t e1 rr
|
3672 |
+
ter2 t e2 rr
|
3673 |
+
ter3 t e3 rr
|
3674 |
+
ter4 t e4 rr
|
3675 |
+
ter5 t e5 rr
|
3676 |
+
teir1 t ei1 rr
|
3677 |
+
teir2 t ei2 rr
|
3678 |
+
teir3 t ei3 rr
|
3679 |
+
teir4 t ei4 rr
|
3680 |
+
teir5 t ei5 rr
|
3681 |
+
tengr1 t eng1 rr
|
3682 |
+
tengr2 t eng2 rr
|
3683 |
+
tengr3 t eng3 rr
|
3684 |
+
tengr4 t eng4 rr
|
3685 |
+
tengr5 t eng5 rr
|
3686 |
+
tir1 t i1 rr
|
3687 |
+
tir2 t i2 rr
|
3688 |
+
tir3 t i3 rr
|
3689 |
+
tir4 t i4 rr
|
3690 |
+
tir5 t i5 rr
|
3691 |
+
tianr1 t ian1 rr
|
3692 |
+
tianr2 t ian2 rr
|
3693 |
+
tianr3 t ian3 rr
|
3694 |
+
tianr4 t ian4 rr
|
3695 |
+
tianr5 t ian5 rr
|
3696 |
+
tiaor1 t iao1 rr
|
3697 |
+
tiaor2 t iao2 rr
|
3698 |
+
tiaor3 t iao3 rr
|
3699 |
+
tiaor4 t iao4 rr
|
3700 |
+
tiaor5 t iao5 rr
|
3701 |
+
tier1 t ie1 rr
|
3702 |
+
tier2 t ie2 rr
|
3703 |
+
tier3 t ie3 rr
|
3704 |
+
tier4 t ie4 rr
|
3705 |
+
tier5 t ie5 rr
|
3706 |
+
tingr1 t ing1 rr
|
3707 |
+
tingr2 t ing2 rr
|
3708 |
+
tingr3 t ing3 rr
|
3709 |
+
tingr4 t ing4 rr
|
3710 |
+
tingr5 t ing5 rr
|
3711 |
+
tongr1 t ong1 rr
|
3712 |
+
tongr2 t ong2 rr
|
3713 |
+
tongr3 t ong3 rr
|
3714 |
+
tongr4 t ong4 rr
|
3715 |
+
tongr5 t ong5 rr
|
3716 |
+
tour1 t ou1 rr
|
3717 |
+
tour2 t ou2 rr
|
3718 |
+
tour3 t ou3 rr
|
3719 |
+
tour4 t ou4 rr
|
3720 |
+
tour5 t ou5 rr
|
3721 |
+
tur1 t u1 rr
|
3722 |
+
tur2 t u2 rr
|
3723 |
+
tur3 t u3 rr
|
3724 |
+
tur4 t u4 rr
|
3725 |
+
tur5 t u5 rr
|
3726 |
+
tuanr1 t uan1 rr
|
3727 |
+
tuanr2 t uan2 rr
|
3728 |
+
tuanr3 t uan3 rr
|
3729 |
+
tuanr4 t uan4 rr
|
3730 |
+
tuanr5 t uan5 rr
|
3731 |
+
tuir1 t uei1 rr
|
3732 |
+
tuir2 t uei2 rr
|
3733 |
+
tuir3 t uei3 rr
|
3734 |
+
tuir4 t uei4 rr
|
3735 |
+
tuir5 t uei5 rr
|
3736 |
+
tunr1 t uen1 rr
|
3737 |
+
tunr2 t uen2 rr
|
3738 |
+
tunr3 t uen3 rr
|
3739 |
+
tunr4 t uen4 rr
|
3740 |
+
tunr5 t uen5 rr
|
3741 |
+
tuor1 t uo1 rr
|
3742 |
+
tuor2 t uo2 rr
|
3743 |
+
tuor3 t uo3 rr
|
3744 |
+
tuor4 t uo4 rr
|
3745 |
+
tuor5 t uo5 rr
|
3746 |
+
war1 w ua1 rr
|
3747 |
+
war2 w ua2 rr
|
3748 |
+
war3 w ua3 rr
|
3749 |
+
war4 w ua4 rr
|
3750 |
+
war5 w ua5 rr
|
3751 |
+
wair1 w uai1 rr
|
3752 |
+
wair2 w uai2 rr
|
3753 |
+
wair3 w uai3 rr
|
3754 |
+
wair4 w uai4 rr
|
3755 |
+
wair5 w uai5 rr
|
3756 |
+
wanr1 w uan1 rr
|
3757 |
+
wanr2 w uan2 rr
|
3758 |
+
wanr3 w uan3 rr
|
3759 |
+
wanr4 w uan4 rr
|
3760 |
+
wanr5 w uan5 rr
|
3761 |
+
wangr1 w uang1 rr
|
3762 |
+
wangr2 w uang2 rr
|
3763 |
+
wangr3 w uang3 rr
|
3764 |
+
wangr4 w uang4 rr
|
3765 |
+
wangr5 w uang5 rr
|
3766 |
+
weir1 w uei1 rr
|
3767 |
+
weir2 w uei2 rr
|
3768 |
+
weir3 w uei3 rr
|
3769 |
+
weir4 w uei4 rr
|
3770 |
+
weir5 w uei5 rr
|
3771 |
+
wenr1 w uen1 rr
|
3772 |
+
wenr2 w uen2 rr
|
3773 |
+
wenr3 w uen3 rr
|
3774 |
+
wenr4 w uen4 rr
|
3775 |
+
wenr5 w uen5 rr
|
3776 |
+
wengr1 w uen1 rr
|
3777 |
+
wengr2 w uen2 rr
|
3778 |
+
wengr3 w uen3 rr
|
3779 |
+
wengr4 w uen4 rr
|
3780 |
+
wengr5 w uen5 rr
|
3781 |
+
wor1 w uo1 rr
|
3782 |
+
wor2 w uo2 rr
|
3783 |
+
wor3 w uo3 rr
|
3784 |
+
wor4 w uo4 rr
|
3785 |
+
wor5 w uo5 rr
|
3786 |
+
wur1 w u1 rr
|
3787 |
+
wur2 w u2 rr
|
3788 |
+
wur3 w u3 rr
|
3789 |
+
wur4 w u4 rr
|
3790 |
+
wur5 w u5 rr
|
3791 |
+
xir1 x i1 rr
|
3792 |
+
xir2 x i2 rr
|
3793 |
+
xir3 x i3 rr
|
3794 |
+
xir4 x i4 rr
|
3795 |
+
xir5 x i5 rr
|
3796 |
+
xiar1 x ia1 rr
|
3797 |
+
xiar2 x ia2 rr
|
3798 |
+
xiar3 x ia3 rr
|
3799 |
+
xiar4 x ia4 rr
|
3800 |
+
xiar5 x ia5 rr
|
3801 |
+
xianr1 x ian1 rr
|
3802 |
+
xianr2 x ian2 rr
|
3803 |
+
xianr3 x ian3 rr
|
3804 |
+
xianr4 x ian4 rr
|
3805 |
+
xianr5 x ian5 rr
|
3806 |
+
xiangr1 x iang1 rr
|
3807 |
+
xiangr2 x iang2 rr
|
3808 |
+
xiangr3 x iang3 rr
|
3809 |
+
xiangr4 x iang4 rr
|
3810 |
+
xiangr5 x iang5 rr
|
3811 |
+
xiaor1 x iao1 rr
|
3812 |
+
xiaor2 x iao2 rr
|
3813 |
+
xiaor3 x iao3 rr
|
3814 |
+
xiaor4 x iao4 rr
|
3815 |
+
xiaor5 x iao5 rr
|
3816 |
+
xier1 x ie1 rr
|
3817 |
+
xier2 x ie2 rr
|
3818 |
+
xier3 x ie3 rr
|
3819 |
+
xier4 x ie4 rr
|
3820 |
+
xier5 x ie5 rr
|
3821 |
+
xinr1 x in1 rr
|
3822 |
+
xinr2 x in2 rr
|
3823 |
+
xinr3 x in3 rr
|
3824 |
+
xinr4 x in4 rr
|
3825 |
+
xinr5 x in5 rr
|
3826 |
+
xingr1 x ing1 rr
|
3827 |
+
xingr2 x ing2 rr
|
3828 |
+
xingr3 x ing3 rr
|
3829 |
+
xingr4 x ing4 rr
|
3830 |
+
xingr5 x ing5 rr
|
3831 |
+
xiongr1 x iong1 rr
|
3832 |
+
xiongr2 x iong2 rr
|
3833 |
+
xiongr3 x iong3 rr
|
3834 |
+
xiongr4 x iong4 rr
|
3835 |
+
xiongr5 x iong5 rr
|
3836 |
+
xiur1 x iou1 rr
|
3837 |
+
xiur2 x iou2 rr
|
3838 |
+
xiur3 x iou3 rr
|
3839 |
+
xiur4 x iou4 rr
|
3840 |
+
xiur5 x iou5 rr
|
3841 |
+
xur1 x v1 rr
|
3842 |
+
xur2 x v2 rr
|
3843 |
+
xur3 x v3 rr
|
3844 |
+
xur4 x v4 rr
|
3845 |
+
xur5 x v5 rr
|
3846 |
+
xuanr1 x van1 rr
|
3847 |
+
xuanr2 x van2 rr
|
3848 |
+
xuanr3 x van3 rr
|
3849 |
+
xuanr4 x van4 rr
|
3850 |
+
xuanr5 x van5 rr
|
3851 |
+
xuer1 x ve1 rr
|
3852 |
+
xuer2 x ve2 rr
|
3853 |
+
xuer3 x ve3 rr
|
3854 |
+
xuer4 x ve4 rr
|
3855 |
+
xuer5 x ve5 rr
|
3856 |
+
xunr1 x vn1 rr
|
3857 |
+
xunr2 x vn2 rr
|
3858 |
+
xunr3 x vn3 rr
|
3859 |
+
xunr4 x vn4 rr
|
3860 |
+
xunr5 x vn5 rr
|
3861 |
+
yar1 y ia1 rr
|
3862 |
+
yar2 y ia2 rr
|
3863 |
+
yar3 y ia3 rr
|
3864 |
+
yar4 y ia4 rr
|
3865 |
+
yar5 y ia5 rr
|
3866 |
+
yanr1 y ian1 rr
|
3867 |
+
yanr2 y ian2 rr
|
3868 |
+
yanr3 y ian3 rr
|
3869 |
+
yanr4 y ian4 rr
|
3870 |
+
yanr5 y ian5 rr
|
3871 |
+
yangr1 y iang1 rr
|
3872 |
+
yangr2 y iang2 rr
|
3873 |
+
yangr3 y iang3 rr
|
3874 |
+
yangr4 y iang4 rr
|
3875 |
+
yangr5 y iang5 rr
|
3876 |
+
yaor1 y iao1 rr
|
3877 |
+
yaor2 y iao2 rr
|
3878 |
+
yaor3 y iao3 rr
|
3879 |
+
yaor4 y iao4 rr
|
3880 |
+
yaor5 y iao5 rr
|
3881 |
+
yer1 y ie1 rr
|
3882 |
+
yer2 y ie2 rr
|
3883 |
+
yer3 y ie3 rr
|
3884 |
+
yer4 y ie4 rr
|
3885 |
+
yer5 y ie5 rr
|
3886 |
+
yir1 y i1 rr
|
3887 |
+
yir2 y i2 rr
|
3888 |
+
yir3 y i3 rr
|
3889 |
+
yir4 y i4 rr
|
3890 |
+
yir5 y i5 rr
|
3891 |
+
yinr1 y in1 rr
|
3892 |
+
yinr2 y in2 rr
|
3893 |
+
yinr3 y in3 rr
|
3894 |
+
yinr4 y in4 rr
|
3895 |
+
yinr5 y in5 rr
|
3896 |
+
yingr1 y ing1 rr
|
3897 |
+
yingr2 y ing2 rr
|
3898 |
+
yingr3 y ing3 rr
|
3899 |
+
yingr4 y ing4 rr
|
3900 |
+
yingr5 y ing5 rr
|
3901 |
+
yor1 y iou1 rr
|
3902 |
+
yor2 y iou2 rr
|
3903 |
+
yor3 y iou3 rr
|
3904 |
+
yor4 y iou4 rr
|
3905 |
+
yor5 y iou5 rr
|
3906 |
+
yongr1 y iong1 rr
|
3907 |
+
yongr2 y iong2 rr
|
3908 |
+
yongr3 y iong3 rr
|
3909 |
+
yongr4 y iong4 rr
|
3910 |
+
yongr5 y iong5 rr
|
3911 |
+
your1 y iou1 rr
|
3912 |
+
your2 y iou2 rr
|
3913 |
+
your3 y iou3 rr
|
3914 |
+
your4 y iou4 rr
|
3915 |
+
your5 y iou5 rr
|
3916 |
+
yur1 y v1 rr
|
3917 |
+
yur2 y v2 rr
|
3918 |
+
yur3 y v3 rr
|
3919 |
+
yur4 y v4 rr
|
3920 |
+
yur5 y v5 rr
|
3921 |
+
yuanr1 y van1 rr
|
3922 |
+
yuanr2 y van2 rr
|
3923 |
+
yuanr3 y van3 rr
|
3924 |
+
yuanr4 y van4 rr
|
3925 |
+
yuanr5 y van5 rr
|
3926 |
+
yuer1 y ve1 rr
|
3927 |
+
yuer2 y ve2 rr
|
3928 |
+
yuer3 y ve3 rr
|
3929 |
+
yuer4 y ve4 rr
|
3930 |
+
yuer5 y ve5 rr
|
3931 |
+
yunr1 y vn1 rr
|
3932 |
+
yunr2 y vn2 rr
|
3933 |
+
yunr3 y vn3 rr
|
3934 |
+
yunr4 y vn4 rr
|
3935 |
+
yunr5 y vn5 rr
|
3936 |
+
zar1 z a1 rr
|
3937 |
+
zar2 z a2 rr
|
3938 |
+
zar3 z a3 rr
|
3939 |
+
zar4 z a4 rr
|
3940 |
+
zar5 z a5 rr
|
3941 |
+
zair1 z ai1 rr
|
3942 |
+
zair2 z ai2 rr
|
3943 |
+
zair3 z ai3 rr
|
3944 |
+
zair4 z ai4 rr
|
3945 |
+
zair5 z ai5 rr
|
3946 |
+
zanr1 z an1 rr
|
3947 |
+
zanr2 z an2 rr
|
3948 |
+
zanr3 z an3 rr
|
3949 |
+
zanr4 z an4 rr
|
3950 |
+
zanr5 z an5 rr
|
3951 |
+
zangr1 z ang1 rr
|
3952 |
+
zangr2 z ang2 rr
|
3953 |
+
zangr3 z ang3 rr
|
3954 |
+
zangr4 z ang4 rr
|
3955 |
+
zangr5 z ang5 rr
|
3956 |
+
zaor1 z ao1 rr
|
3957 |
+
zaor2 z ao2 rr
|
3958 |
+
zaor3 z ao3 rr
|
3959 |
+
zaor4 z ao4 rr
|
3960 |
+
zaor5 z ao5 rr
|
3961 |
+
zer1 z e1 rr
|
3962 |
+
zer2 z e2 rr
|
3963 |
+
zer3 z e3 rr
|
3964 |
+
zer4 z e4 rr
|
3965 |
+
zer5 z e5 rr
|
3966 |
+
zeir1 z ei1 rr
|
3967 |
+
zeir2 z ei2 rr
|
3968 |
+
zeir3 z ei3 rr
|
3969 |
+
zeir4 z ei4 rr
|
3970 |
+
zeir5 z ei5 rr
|
3971 |
+
zenr1 z en1 rr
|
3972 |
+
zenr2 z en2 rr
|
3973 |
+
zenr3 z en3 rr
|
3974 |
+
zenr4 z en4 rr
|
3975 |
+
zenr5 z en5 rr
|
3976 |
+
zengr1 z eng1 rr
|
3977 |
+
zengr2 z eng2 rr
|
3978 |
+
zengr3 z eng3 rr
|
3979 |
+
zengr4 z eng4 rr
|
3980 |
+
zengr5 z eng5 rr
|
3981 |
+
zhar1 zh a1 rr
|
3982 |
+
zhar2 zh a2 rr
|
3983 |
+
zhar3 zh a3 rr
|
3984 |
+
zhar4 zh a4 rr
|
3985 |
+
zhar5 zh a5 rr
|
3986 |
+
zhair1 zh ai1 rr
|
3987 |
+
zhair2 zh ai2 rr
|
3988 |
+
zhair3 zh ai3 rr
|
3989 |
+
zhair4 zh ai4 rr
|
3990 |
+
zhair5 zh ai5 rr
|
3991 |
+
zhanr1 zh an1 rr
|
3992 |
+
zhanr2 zh an2 rr
|
3993 |
+
zhanr3 zh an3 rr
|
3994 |
+
zhanr4 zh an4 rr
|
3995 |
+
zhanr5 zh an5 rr
|
3996 |
+
zhangr1 zh ang1 rr
|
3997 |
+
zhangr2 zh ang2 rr
|
3998 |
+
zhangr3 zh ang3 rr
|
3999 |
+
zhangr4 zh ang4 rr
|
4000 |
+
zhangr5 zh ang5 rr
|
4001 |
+
zhaor1 zh ao1 rr
|
4002 |
+
zhaor2 zh ao2 rr
|
4003 |
+
zhaor3 zh ao3 rr
|
4004 |
+
zhaor4 zh ao4 rr
|
4005 |
+
zhaor5 zh ao5 rr
|
4006 |
+
zher1 zh e1 rr
|
4007 |
+
zher2 zh e2 rr
|
4008 |
+
zher3 zh e3 rr
|
4009 |
+
zher4 zh e4 rr
|
4010 |
+
zher5 zh e5 rr
|
4011 |
+
zheir1 zh ei1 rr
|
4012 |
+
zheir2 zh ei2 rr
|
4013 |
+
zheir3 zh ei3 rr
|
4014 |
+
zheir4 zh ei4 rr
|
4015 |
+
zheir5 zh ei5 rr
|
4016 |
+
zhenr1 zh en1 rr
|
4017 |
+
zhenr2 zh en2 rr
|
4018 |
+
zhenr3 zh en3 rr
|
4019 |
+
zhenr4 zh en4 rr
|
4020 |
+
zhenr5 zh en5 rr
|
4021 |
+
zhengr1 zh eng1 rr
|
4022 |
+
zhengr2 zh eng2 rr
|
4023 |
+
zhengr3 zh eng3 rr
|
4024 |
+
zhengr4 zh eng4 rr
|
4025 |
+
zhengr5 zh eng5 rr
|
4026 |
+
zhir1 zh iii1 rr
|
4027 |
+
zhir2 zh iii2 rr
|
4028 |
+
zhir3 zh iii3 rr
|
4029 |
+
zhir4 zh iii4 rr
|
4030 |
+
zhir5 zh iii5 rr
|
4031 |
+
zhongr1 zh ong1 rr
|
4032 |
+
zhongr2 zh ong2 rr
|
4033 |
+
zhongr3 zh ong3 rr
|
4034 |
+
zhongr4 zh ong4 rr
|
4035 |
+
zhongr5 zh ong5 rr
|
4036 |
+
zhour1 zh ou1 rr
|
4037 |
+
zhour2 zh ou2 rr
|
4038 |
+
zhour3 zh ou3 rr
|
4039 |
+
zhour4 zh ou4 rr
|
4040 |
+
zhour5 zh ou5 rr
|
4041 |
+
zhur1 zh u1 rr
|
4042 |
+
zhur2 zh u2 rr
|
4043 |
+
zhur3 zh u3 rr
|
4044 |
+
zhur4 zh u4 rr
|
4045 |
+
zhur5 zh u5 rr
|
4046 |
+
zhuar1 zh ua1 rr
|
4047 |
+
zhuar2 zh ua2 rr
|
4048 |
+
zhuar3 zh ua3 rr
|
4049 |
+
zhuar4 zh ua4 rr
|
4050 |
+
zhuar5 zh ua5 rr
|
4051 |
+
zhuair1 zh uai1 rr
|
4052 |
+
zhuair2 zh uai2 rr
|
4053 |
+
zhuair3 zh uai3 rr
|
4054 |
+
zhuair4 zh uai4 rr
|
4055 |
+
zhuair5 zh uai5 rr
|
4056 |
+
zhuanr1 zh uan1 rr
|
4057 |
+
zhuanr2 zh uan2 rr
|
4058 |
+
zhuanr3 zh uan3 rr
|
4059 |
+
zhuanr4 zh uan4 rr
|
4060 |
+
zhuanr5 zh uan5 rr
|
4061 |
+
zhuangr1 zh uang1 rr
|
4062 |
+
zhuangr2 zh uang2 rr
|
4063 |
+
zhuangr3 zh uang3 rr
|
4064 |
+
zhuangr4 zh uang4 rr
|
4065 |
+
zhuangr5 zh uang5 rr
|
4066 |
+
zhuir1 zh uei1 rr
|
4067 |
+
zhuir2 zh uei2 rr
|
4068 |
+
zhuir3 zh uei3 rr
|
4069 |
+
zhuir4 zh uei4 rr
|
4070 |
+
zhuir5 zh uei5 rr
|
4071 |
+
zhunr1 zh uen1 rr
|
4072 |
+
zhunr2 zh uen2 rr
|
4073 |
+
zhunr3 zh uen3 rr
|
4074 |
+
zhunr4 zh uen4 rr
|
4075 |
+
zhunr5 zh uen5 rr
|
4076 |
+
zhuor1 zh uo1 rr
|
4077 |
+
zhuor2 zh uo2 rr
|
4078 |
+
zhuor3 zh uo3 rr
|
4079 |
+
zhuor4 zh uo4 rr
|
4080 |
+
zhuor5 zh uo5 rr
|
4081 |
+
zir1 z ii1 rr
|
4082 |
+
zir2 z ii2 rr
|
4083 |
+
zir3 z ii3 rr
|
4084 |
+
zir4 z ii4 rr
|
4085 |
+
zir5 z ii5 rr
|
4086 |
+
zongr1 z ong1 rr
|
4087 |
+
zongr2 z ong2 rr
|
4088 |
+
zongr3 z ong3 rr
|
4089 |
+
zongr4 z ong4 rr
|
4090 |
+
zongr5 z ong5 rr
|
4091 |
+
zour1 z ou1 rr
|
4092 |
+
zour2 z ou2 rr
|
4093 |
+
zour3 z ou3 rr
|
4094 |
+
zour4 z ou4 rr
|
4095 |
+
zour5 z ou5 rr
|
4096 |
+
zur1 z u1 rr
|
4097 |
+
zur2 z u2 rr
|
4098 |
+
zur3 z u3 rr
|
4099 |
+
zur4 z u4 rr
|
4100 |
+
zur5 z u5 rr
|
4101 |
+
zuanr1 z uan1 rr
|
4102 |
+
zuanr2 z uan2 rr
|
4103 |
+
zuanr3 z uan3 rr
|
4104 |
+
zuanr4 z uan4 rr
|
4105 |
+
zuanr5 z uan5 rr
|
4106 |
+
zuir1 z uei1 rr
|
4107 |
+
zuir2 z uei2 rr
|
4108 |
+
zuir3 z uei3 rr
|
4109 |
+
zuir4 z uei4 rr
|
4110 |
+
zuir5 z uei5 rr
|
4111 |
+
zunr1 z uen1 rr
|
4112 |
+
zunr2 z uen2 rr
|
4113 |
+
zunr3 z uen3 rr
|
4114 |
+
zunr4 z uen4 rr
|
4115 |
+
zunr5 z uen5 rr
|
4116 |
+
zuor1 z uo1 rr
|
4117 |
+
zuor2 z uo2 rr
|
4118 |
+
zuor3 z uo3 rr
|
4119 |
+
zuor4 z uo4 rr
|
4120 |
+
zuor5 z uo5 rr
|
text/numbers.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
""" This code is modified from https://github.com/keithito/tacotron """
|
7 |
+
|
8 |
+
import inflect
|
9 |
+
import re
|
10 |
+
|
11 |
+
_inflect = inflect.engine()
|
12 |
+
_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
|
13 |
+
_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
|
14 |
+
_pounds_re = re.compile(r"£([0-9\,]*[0-9]+)")
|
15 |
+
_dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
|
16 |
+
_ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
|
17 |
+
_number_re = re.compile(r"[0-9]+")
|
18 |
+
|
19 |
+
|
20 |
+
def _remove_commas(m):
|
21 |
+
return m.group(1).replace(",", "")
|
22 |
+
|
23 |
+
|
24 |
+
def _expand_decimal_point(m):
|
25 |
+
return m.group(1).replace(".", " point ")
|
26 |
+
|
27 |
+
|
28 |
+
def _expand_dollars(m):
|
29 |
+
match = m.group(1)
|
30 |
+
parts = match.split(".")
|
31 |
+
if len(parts) > 2:
|
32 |
+
return match + " dollars" # Unexpected format
|
33 |
+
dollars = int(parts[0]) if parts[0] else 0
|
34 |
+
cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
|
35 |
+
if dollars and cents:
|
36 |
+
dollar_unit = "dollar" if dollars == 1 else "dollars"
|
37 |
+
cent_unit = "cent" if cents == 1 else "cents"
|
38 |
+
return "%s %s, %s %s" % (dollars, dollar_unit, cents, cent_unit)
|
39 |
+
elif dollars:
|
40 |
+
dollar_unit = "dollar" if dollars == 1 else "dollars"
|
41 |
+
return "%s %s" % (dollars, dollar_unit)
|
42 |
+
elif cents:
|
43 |
+
cent_unit = "cent" if cents == 1 else "cents"
|
44 |
+
return "%s %s" % (cents, cent_unit)
|
45 |
+
else:
|
46 |
+
return "zero dollars"
|
47 |
+
|
48 |
+
|
49 |
+
def _expand_ordinal(m):
|
50 |
+
return _inflect.number_to_words(m.group(0))
|
51 |
+
|
52 |
+
|
53 |
+
def _expand_number(m):
|
54 |
+
num = int(m.group(0))
|
55 |
+
if num > 1000 and num < 3000:
|
56 |
+
if num == 2000:
|
57 |
+
return "two thousand"
|
58 |
+
elif num > 2000 and num < 2010:
|
59 |
+
return "two thousand " + _inflect.number_to_words(num % 100)
|
60 |
+
elif num % 100 == 0:
|
61 |
+
return _inflect.number_to_words(num // 100) + " hundred"
|
62 |
+
else:
|
63 |
+
return _inflect.number_to_words(
|
64 |
+
num, andword="", zero="oh", group=2
|
65 |
+
).replace(", ", " ")
|
66 |
+
else:
|
67 |
+
return _inflect.number_to_words(num, andword="")
|
68 |
+
|
69 |
+
|
70 |
+
def normalize_numbers(text):
|
71 |
+
text = re.sub(_comma_number_re, _remove_commas, text)
|
72 |
+
text = re.sub(_pounds_re, r"\1 pounds", text)
|
73 |
+
text = re.sub(_dollars_re, _expand_dollars, text)
|
74 |
+
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
|
75 |
+
text = re.sub(_ordinal_re, _expand_ordinal, text)
|
76 |
+
text = re.sub(_number_re, _expand_number, text)
|
77 |
+
return text
|
text/pinyin.py
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
initials = [
|
7 |
+
"b",
|
8 |
+
"c",
|
9 |
+
"ch",
|
10 |
+
"d",
|
11 |
+
"f",
|
12 |
+
"g",
|
13 |
+
"h",
|
14 |
+
"j",
|
15 |
+
"k",
|
16 |
+
"l",
|
17 |
+
"m",
|
18 |
+
"n",
|
19 |
+
"p",
|
20 |
+
"q",
|
21 |
+
"r",
|
22 |
+
"s",
|
23 |
+
"sh",
|
24 |
+
"t",
|
25 |
+
"w",
|
26 |
+
"x",
|
27 |
+
"y",
|
28 |
+
"z",
|
29 |
+
"zh",
|
30 |
+
]
|
31 |
+
finals = [
|
32 |
+
"a1",
|
33 |
+
"a2",
|
34 |
+
"a3",
|
35 |
+
"a4",
|
36 |
+
"a5",
|
37 |
+
"ai1",
|
38 |
+
"ai2",
|
39 |
+
"ai3",
|
40 |
+
"ai4",
|
41 |
+
"ai5",
|
42 |
+
"an1",
|
43 |
+
"an2",
|
44 |
+
"an3",
|
45 |
+
"an4",
|
46 |
+
"an5",
|
47 |
+
"ang1",
|
48 |
+
"ang2",
|
49 |
+
"ang3",
|
50 |
+
"ang4",
|
51 |
+
"ang5",
|
52 |
+
"ao1",
|
53 |
+
"ao2",
|
54 |
+
"ao3",
|
55 |
+
"ao4",
|
56 |
+
"ao5",
|
57 |
+
"e1",
|
58 |
+
"e2",
|
59 |
+
"e3",
|
60 |
+
"e4",
|
61 |
+
"e5",
|
62 |
+
"ei1",
|
63 |
+
"ei2",
|
64 |
+
"ei3",
|
65 |
+
"ei4",
|
66 |
+
"ei5",
|
67 |
+
"en1",
|
68 |
+
"en2",
|
69 |
+
"en3",
|
70 |
+
"en4",
|
71 |
+
"en5",
|
72 |
+
"eng1",
|
73 |
+
"eng2",
|
74 |
+
"eng3",
|
75 |
+
"eng4",
|
76 |
+
"eng5",
|
77 |
+
"er1",
|
78 |
+
"er2",
|
79 |
+
"er3",
|
80 |
+
"er4",
|
81 |
+
"er5",
|
82 |
+
"i1",
|
83 |
+
"i2",
|
84 |
+
"i3",
|
85 |
+
"i4",
|
86 |
+
"i5",
|
87 |
+
"ia1",
|
88 |
+
"ia2",
|
89 |
+
"ia3",
|
90 |
+
"ia4",
|
91 |
+
"ia5",
|
92 |
+
"ian1",
|
93 |
+
"ian2",
|
94 |
+
"ian3",
|
95 |
+
"ian4",
|
96 |
+
"ian5",
|
97 |
+
"iang1",
|
98 |
+
"iang2",
|
99 |
+
"iang3",
|
100 |
+
"iang4",
|
101 |
+
"iang5",
|
102 |
+
"iao1",
|
103 |
+
"iao2",
|
104 |
+
"iao3",
|
105 |
+
"iao4",
|
106 |
+
"iao5",
|
107 |
+
"ie1",
|
108 |
+
"ie2",
|
109 |
+
"ie3",
|
110 |
+
"ie4",
|
111 |
+
"ie5",
|
112 |
+
"ii1",
|
113 |
+
"ii2",
|
114 |
+
"ii3",
|
115 |
+
"ii4",
|
116 |
+
"ii5",
|
117 |
+
"iii1",
|
118 |
+
"iii2",
|
119 |
+
"iii3",
|
120 |
+
"iii4",
|
121 |
+
"iii5",
|
122 |
+
"in1",
|
123 |
+
"in2",
|
124 |
+
"in3",
|
125 |
+
"in4",
|
126 |
+
"in5",
|
127 |
+
"ing1",
|
128 |
+
"ing2",
|
129 |
+
"ing3",
|
130 |
+
"ing4",
|
131 |
+
"ing5",
|
132 |
+
"iong1",
|
133 |
+
"iong2",
|
134 |
+
"iong3",
|
135 |
+
"iong4",
|
136 |
+
"iong5",
|
137 |
+
"iou1",
|
138 |
+
"iou2",
|
139 |
+
"iou3",
|
140 |
+
"iou4",
|
141 |
+
"iou5",
|
142 |
+
"o1",
|
143 |
+
"o2",
|
144 |
+
"o3",
|
145 |
+
"o4",
|
146 |
+
"o5",
|
147 |
+
"ong1",
|
148 |
+
"ong2",
|
149 |
+
"ong3",
|
150 |
+
"ong4",
|
151 |
+
"ong5",
|
152 |
+
"ou1",
|
153 |
+
"ou2",
|
154 |
+
"ou3",
|
155 |
+
"ou4",
|
156 |
+
"ou5",
|
157 |
+
"u1",
|
158 |
+
"u2",
|
159 |
+
"u3",
|
160 |
+
"u4",
|
161 |
+
"u5",
|
162 |
+
"ua1",
|
163 |
+
"ua2",
|
164 |
+
"ua3",
|
165 |
+
"ua4",
|
166 |
+
"ua5",
|
167 |
+
"uai1",
|
168 |
+
"uai2",
|
169 |
+
"uai3",
|
170 |
+
"uai4",
|
171 |
+
"uai5",
|
172 |
+
"uan1",
|
173 |
+
"uan2",
|
174 |
+
"uan3",
|
175 |
+
"uan4",
|
176 |
+
"uan5",
|
177 |
+
"uang1",
|
178 |
+
"uang2",
|
179 |
+
"uang3",
|
180 |
+
"uang4",
|
181 |
+
"uang5",
|
182 |
+
"uei1",
|
183 |
+
"uei2",
|
184 |
+
"uei3",
|
185 |
+
"uei4",
|
186 |
+
"uei5",
|
187 |
+
"uen1",
|
188 |
+
"uen2",
|
189 |
+
"uen3",
|
190 |
+
"uen4",
|
191 |
+
"uen5",
|
192 |
+
"uo1",
|
193 |
+
"uo2",
|
194 |
+
"uo3",
|
195 |
+
"uo4",
|
196 |
+
"uo5",
|
197 |
+
"v1",
|
198 |
+
"v2",
|
199 |
+
"v3",
|
200 |
+
"v4",
|
201 |
+
"v5",
|
202 |
+
"van1",
|
203 |
+
"van2",
|
204 |
+
"van3",
|
205 |
+
"van4",
|
206 |
+
"van5",
|
207 |
+
"ve1",
|
208 |
+
"ve2",
|
209 |
+
"ve3",
|
210 |
+
"ve4",
|
211 |
+
"ve5",
|
212 |
+
"vn1",
|
213 |
+
"vn2",
|
214 |
+
"vn3",
|
215 |
+
"vn4",
|
216 |
+
"vn5",
|
217 |
+
]
|
218 |
+
valid_symbols = initials + finals + ["rr"]
|
text/symbol_table.py
ADDED
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2020 Mobvoi Inc. (authors: Fangjun Kuang)
|
2 |
+
#
|
3 |
+
# See ../../../LICENSE for clarification regarding multiple authors
|
4 |
+
#
|
5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6 |
+
# you may not use this file except in compliance with the License.
|
7 |
+
# You may obtain a copy of the License at
|
8 |
+
#
|
9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10 |
+
#
|
11 |
+
# Unless required by applicable law or agreed to in writing, software
|
12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14 |
+
# See the License for the specific language governing permissions and
|
15 |
+
# limitations under the License.
|
16 |
+
|
17 |
+
from dataclasses import dataclass
|
18 |
+
from dataclasses import field
|
19 |
+
from typing import Dict
|
20 |
+
from typing import Generic
|
21 |
+
from typing import List
|
22 |
+
from typing import Optional
|
23 |
+
from typing import TypeVar
|
24 |
+
from typing import Union
|
25 |
+
|
26 |
+
Symbol = TypeVar("Symbol")
|
27 |
+
|
28 |
+
# SymbolTable is copied from
|
29 |
+
# https://github.com/k2-fsa/k2/blob/master/k2/python/k2/symbol_table.py
|
30 |
+
|
31 |
+
"""
|
32 |
+
SymbolTable: map symbol to id
|
33 |
+
"""
|
34 |
+
|
35 |
+
|
36 |
+
@dataclass(repr=False)
|
37 |
+
class SymbolTable(Generic[Symbol]):
|
38 |
+
"""SymbolTable that maps symbol IDs, found on the FSA arcs to
|
39 |
+
actual objects. These objects can be arbitrary Python objects
|
40 |
+
that can serve as keys in a dictionary (i.e. they need to be
|
41 |
+
hashable and immutable).
|
42 |
+
|
43 |
+
The SymbolTable can only be read to/written from disk if the
|
44 |
+
symbols are strings.
|
45 |
+
"""
|
46 |
+
|
47 |
+
_id2sym: Dict[int, Symbol] = field(default_factory=dict)
|
48 |
+
"""Map an integer to a symbol.
|
49 |
+
"""
|
50 |
+
|
51 |
+
_sym2id: Dict[Symbol, int] = field(default_factory=dict)
|
52 |
+
"""Map a symbol to an integer.
|
53 |
+
"""
|
54 |
+
|
55 |
+
_next_available_id: int = 1
|
56 |
+
"""A helper internal field that helps adding new symbols
|
57 |
+
to the table efficiently.
|
58 |
+
"""
|
59 |
+
|
60 |
+
eps: Symbol = "<eps>"
|
61 |
+
"""Null symbol, always mapped to index 0.
|
62 |
+
"""
|
63 |
+
|
64 |
+
def __post_init__(self):
|
65 |
+
assert all(self._sym2id[sym] == idx for idx, sym in self._id2sym.items())
|
66 |
+
assert all(self._id2sym[idx] == sym for sym, idx in self._sym2id.items())
|
67 |
+
assert 0 not in self._id2sym or self._id2sym[0] == self.eps
|
68 |
+
|
69 |
+
self._next_available_id = max(self._id2sym, default=0) + 1
|
70 |
+
self._id2sym.setdefault(0, self.eps)
|
71 |
+
self._sym2id.setdefault(self.eps, 0)
|
72 |
+
|
73 |
+
@staticmethod
|
74 |
+
def from_str(s: str) -> "SymbolTable":
|
75 |
+
"""Build a symbol table from a string.
|
76 |
+
|
77 |
+
The string consists of lines. Every line has two fields separated
|
78 |
+
by space(s), tab(s) or both. The first field is the symbol and the
|
79 |
+
second the integer id of the symbol.
|
80 |
+
|
81 |
+
Args:
|
82 |
+
s:
|
83 |
+
The input string with the format described above.
|
84 |
+
Returns:
|
85 |
+
An instance of :class:`SymbolTable`.
|
86 |
+
"""
|
87 |
+
id2sym: Dict[int, str] = dict()
|
88 |
+
sym2id: Dict[str, int] = dict()
|
89 |
+
|
90 |
+
for line in s.split("\n"):
|
91 |
+
fields = line.split()
|
92 |
+
if len(fields) == 0:
|
93 |
+
continue # skip empty lines
|
94 |
+
assert (
|
95 |
+
len(fields) == 2
|
96 |
+
), f"Expect a line with 2 fields. Given: {len(fields)}"
|
97 |
+
sym, idx = fields[0], int(fields[1])
|
98 |
+
assert sym not in sym2id, f"Duplicated symbol {sym}"
|
99 |
+
assert idx not in id2sym, f"Duplicated id {idx}"
|
100 |
+
id2sym[idx] = sym
|
101 |
+
sym2id[sym] = idx
|
102 |
+
|
103 |
+
eps = id2sym.get(0, "<eps>")
|
104 |
+
|
105 |
+
return SymbolTable(_id2sym=id2sym, _sym2id=sym2id, eps=eps)
|
106 |
+
|
107 |
+
@staticmethod
|
108 |
+
def from_file(filename: str) -> "SymbolTable":
|
109 |
+
"""Build a symbol table from file.
|
110 |
+
|
111 |
+
Every line in the symbol table file has two fields separated by
|
112 |
+
space(s), tab(s) or both. The following is an example file:
|
113 |
+
|
114 |
+
.. code-block::
|
115 |
+
|
116 |
+
<eps> 0
|
117 |
+
a 1
|
118 |
+
b 2
|
119 |
+
c 3
|
120 |
+
|
121 |
+
Args:
|
122 |
+
filename:
|
123 |
+
Name of the symbol table file. Its format is documented above.
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
An instance of :class:`SymbolTable`.
|
127 |
+
|
128 |
+
"""
|
129 |
+
with open(filename, "r", encoding="utf-8") as f:
|
130 |
+
return SymbolTable.from_str(f.read().strip())
|
131 |
+
|
132 |
+
def to_str(self) -> str:
|
133 |
+
"""
|
134 |
+
Returns:
|
135 |
+
Return a string representation of this object. You can pass
|
136 |
+
it to the method ``from_str`` to recreate an identical object.
|
137 |
+
"""
|
138 |
+
s = ""
|
139 |
+
for idx, symbol in sorted(self._id2sym.items()):
|
140 |
+
s += f"{symbol} {idx}\n"
|
141 |
+
return s
|
142 |
+
|
143 |
+
def to_file(self, filename: str):
|
144 |
+
"""Serialize the SymbolTable to a file.
|
145 |
+
|
146 |
+
Every line in the symbol table file has two fields separated by
|
147 |
+
space(s), tab(s) or both. The following is an example file:
|
148 |
+
|
149 |
+
.. code-block::
|
150 |
+
|
151 |
+
<eps> 0
|
152 |
+
a 1
|
153 |
+
b 2
|
154 |
+
c 3
|
155 |
+
|
156 |
+
Args:
|
157 |
+
filename:
|
158 |
+
Name of the symbol table file. Its format is documented above.
|
159 |
+
"""
|
160 |
+
with open(filename, "w") as f:
|
161 |
+
for idx, symbol in sorted(self._id2sym.items()):
|
162 |
+
print(symbol, idx, file=f)
|
163 |
+
|
164 |
+
def add(self, symbol: Symbol, index: Optional[int] = None) -> int:
|
165 |
+
"""Add a new symbol to the SymbolTable.
|
166 |
+
|
167 |
+
Args:
|
168 |
+
symbol:
|
169 |
+
The symbol to be added.
|
170 |
+
index:
|
171 |
+
Optional int id to which the symbol should be assigned.
|
172 |
+
If it is not available, a ValueError will be raised.
|
173 |
+
|
174 |
+
Returns:
|
175 |
+
The int id to which the symbol has been assigned.
|
176 |
+
"""
|
177 |
+
# Already in the table? Return its ID.
|
178 |
+
if symbol in self._sym2id:
|
179 |
+
return self._sym2id[symbol]
|
180 |
+
# Specific ID not provided - use next available.
|
181 |
+
if index is None:
|
182 |
+
index = self._next_available_id
|
183 |
+
# Specific ID provided but not available.
|
184 |
+
if index in self._id2sym:
|
185 |
+
raise ValueError(
|
186 |
+
f"Cannot assign id '{index}' to '{symbol}' - "
|
187 |
+
f"already occupied by {self._id2sym[index]}"
|
188 |
+
)
|
189 |
+
self._sym2id[symbol] = index
|
190 |
+
self._id2sym[index] = symbol
|
191 |
+
|
192 |
+
# Update next available ID if needed
|
193 |
+
if self._next_available_id <= index:
|
194 |
+
self._next_available_id = index + 1
|
195 |
+
|
196 |
+
return index
|
197 |
+
|
198 |
+
def get(self, k: Union[int, Symbol]) -> Union[Symbol, int]:
|
199 |
+
"""Get a symbol for an id or get an id for a symbol
|
200 |
+
|
201 |
+
Args:
|
202 |
+
k:
|
203 |
+
If it is an id, it tries to find the symbol corresponding
|
204 |
+
to the id; if it is a symbol, it tries to find the id
|
205 |
+
corresponding to the symbol.
|
206 |
+
|
207 |
+
Returns:
|
208 |
+
An id or a symbol depending on the given `k`.
|
209 |
+
"""
|
210 |
+
if isinstance(k, int):
|
211 |
+
return self._id2sym[k]
|
212 |
+
else:
|
213 |
+
return self._sym2id[k]
|
214 |
+
|
215 |
+
def merge(self, other: "SymbolTable") -> "SymbolTable":
|
216 |
+
"""Create a union of two SymbolTables.
|
217 |
+
Raises an AssertionError if the same IDs are occupied by
|
218 |
+
different symbols.
|
219 |
+
|
220 |
+
Args:
|
221 |
+
other:
|
222 |
+
A symbol table to merge with ``self``.
|
223 |
+
|
224 |
+
Returns:
|
225 |
+
A new symbol table.
|
226 |
+
"""
|
227 |
+
self._check_compatible(other)
|
228 |
+
return SymbolTable(
|
229 |
+
_id2sym={**self._id2sym, **other._id2sym},
|
230 |
+
_sym2id={**self._sym2id, **other._sym2id},
|
231 |
+
eps=self.eps,
|
232 |
+
)
|
233 |
+
|
234 |
+
def _check_compatible(self, other: "SymbolTable") -> None:
|
235 |
+
# Epsilon compatibility
|
236 |
+
assert self.eps == other.eps, (
|
237 |
+
f"Mismatched epsilon symbol: " f"{self.eps} != {other.eps}"
|
238 |
+
)
|
239 |
+
# IDs compatibility
|
240 |
+
common_ids = set(self._id2sym).intersection(other._id2sym)
|
241 |
+
for idx in common_ids:
|
242 |
+
assert self[idx] == other[idx], (
|
243 |
+
f"ID conflict for id: {idx}, "
|
244 |
+
f'self[idx] = "{self[idx]}", '
|
245 |
+
f'other[idx] = "{other[idx]}"'
|
246 |
+
)
|
247 |
+
# Symbols compatibility
|
248 |
+
common_symbols = set(self._sym2id).intersection(other._sym2id)
|
249 |
+
for sym in common_symbols:
|
250 |
+
assert self[sym] == other[sym], (
|
251 |
+
f"ID conflict for id: {sym}, "
|
252 |
+
f'self[sym] = "{self[sym]}", '
|
253 |
+
f'other[sym] = "{other[sym]}"'
|
254 |
+
)
|
255 |
+
|
256 |
+
def __getitem__(self, item: Union[int, Symbol]) -> Union[Symbol, int]:
|
257 |
+
return self.get(item)
|
258 |
+
|
259 |
+
def __contains__(self, item: Union[int, Symbol]) -> bool:
|
260 |
+
if isinstance(item, int):
|
261 |
+
return item in self._id2sym
|
262 |
+
else:
|
263 |
+
return item in self._sym2id
|
264 |
+
|
265 |
+
def __len__(self) -> int:
|
266 |
+
return len(self._id2sym)
|
267 |
+
|
268 |
+
def __eq__(self, other: "SymbolTable") -> bool:
|
269 |
+
if len(self) != len(other):
|
270 |
+
return False
|
271 |
+
|
272 |
+
for s in self.symbols:
|
273 |
+
if self[s] != other[s]:
|
274 |
+
return False
|
275 |
+
|
276 |
+
return True
|
277 |
+
|
278 |
+
@property
|
279 |
+
def ids(self) -> List[int]:
|
280 |
+
"""Returns a list of integer IDs corresponding to the symbols."""
|
281 |
+
ans = list(self._id2sym.keys())
|
282 |
+
ans.sort()
|
283 |
+
return ans
|
284 |
+
|
285 |
+
@property
|
286 |
+
def symbols(self) -> List[Symbol]:
|
287 |
+
"""Returns a list of symbols (e.g., strings) corresponding to
|
288 |
+
the integer IDs.
|
289 |
+
"""
|
290 |
+
ans = list(self._sym2id.keys())
|
291 |
+
ans.sort()
|
292 |
+
return ans
|
text/symbols.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
""" This code is modified from https://github.com/keithito/tacotron """
|
7 |
+
|
8 |
+
"""
|
9 |
+
Defines the set of symbols used in text input to the model.
|
10 |
+
|
11 |
+
The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. """
|
12 |
+
|
13 |
+
from text import cmudict, pinyin
|
14 |
+
|
15 |
+
_pad = "_"
|
16 |
+
_punctuation = "!'(),.:;? "
|
17 |
+
_special = "-"
|
18 |
+
_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
19 |
+
_silences = ["@sp", "@spn", "@sil"]
|
20 |
+
|
21 |
+
# Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
|
22 |
+
_arpabet = ["@" + s for s in cmudict.valid_symbols]
|
23 |
+
_pinyin = ["@" + s for s in pinyin.valid_symbols]
|
24 |
+
|
25 |
+
# Export all symbols:
|
26 |
+
symbols = (
|
27 |
+
[_pad]
|
28 |
+
+ list(_special)
|
29 |
+
+ list(_punctuation)
|
30 |
+
+ list(_letters)
|
31 |
+
+ _arpabet
|
32 |
+
+ _silences
|
33 |
+
# + _pinyin # for chinese
|
34 |
+
)
|
text/text_token_collation.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import List, Tuple
|
8 |
+
import os
|
9 |
+
import numpy as np
|
10 |
+
import torch
|
11 |
+
from text.symbol_table import SymbolTable
|
12 |
+
from text import text_to_sequence
|
13 |
+
|
14 |
+
|
15 |
+
"""
|
16 |
+
TextToken: map text to id
|
17 |
+
"""
|
18 |
+
|
19 |
+
|
20 |
+
# TextTokenCollator is modified from
|
21 |
+
# https://github.com/lifeiteng/vall-e/blob/9c69096d603ce13174fb5cb025f185e2e9b36ac7/valle/data/collation.py
|
22 |
+
class TextTokenCollator:
|
23 |
+
def __init__(
|
24 |
+
self,
|
25 |
+
text_tokens: List[str],
|
26 |
+
add_eos: bool = True,
|
27 |
+
add_bos: bool = True,
|
28 |
+
pad_symbol: str = "<pad>",
|
29 |
+
bos_symbol: str = "<bos>",
|
30 |
+
eos_symbol: str = "<eos>",
|
31 |
+
):
|
32 |
+
self.pad_symbol = pad_symbol
|
33 |
+
self.add_eos = add_eos
|
34 |
+
self.add_bos = add_bos
|
35 |
+
self.bos_symbol = bos_symbol
|
36 |
+
self.eos_symbol = eos_symbol
|
37 |
+
|
38 |
+
unique_tokens = [pad_symbol]
|
39 |
+
if add_bos:
|
40 |
+
unique_tokens.append(bos_symbol)
|
41 |
+
if add_eos:
|
42 |
+
unique_tokens.append(eos_symbol)
|
43 |
+
unique_tokens.extend(sorted(text_tokens))
|
44 |
+
|
45 |
+
self.token2idx = {token: idx for idx, token in enumerate(unique_tokens)}
|
46 |
+
self.idx2token = unique_tokens
|
47 |
+
|
48 |
+
def index(self, tokens_list: List[str]) -> Tuple[torch.Tensor, torch.Tensor]:
|
49 |
+
seqs, seq_lens = [], []
|
50 |
+
for tokens in tokens_list:
|
51 |
+
assert all([True if s in self.token2idx else False for s in tokens]) is True
|
52 |
+
seq = (
|
53 |
+
([self.bos_symbol] if self.add_bos else [])
|
54 |
+
+ list(tokens)
|
55 |
+
+ ([self.eos_symbol] if self.add_eos else [])
|
56 |
+
)
|
57 |
+
seqs.append(seq)
|
58 |
+
seq_lens.append(len(seq))
|
59 |
+
|
60 |
+
max_len = max(seq_lens)
|
61 |
+
for k, (seq, seq_len) in enumerate(zip(seqs, seq_lens)):
|
62 |
+
seq.extend([self.pad_symbol] * (max_len - seq_len))
|
63 |
+
|
64 |
+
tokens = torch.from_numpy(
|
65 |
+
np.array(
|
66 |
+
[[self.token2idx[token] for token in seq] for seq in seqs],
|
67 |
+
dtype=np.int64,
|
68 |
+
)
|
69 |
+
)
|
70 |
+
tokens_lens = torch.IntTensor(seq_lens)
|
71 |
+
|
72 |
+
return tokens, tokens_lens
|
73 |
+
|
74 |
+
def __call__(self, text):
|
75 |
+
tokens_seq = [p for p in text]
|
76 |
+
seq = (
|
77 |
+
([self.bos_symbol] if self.add_bos else [])
|
78 |
+
+ tokens_seq
|
79 |
+
+ ([self.eos_symbol] if self.add_eos else [])
|
80 |
+
)
|
81 |
+
|
82 |
+
token_ids = [self.token2idx[token] for token in seq]
|
83 |
+
token_lens = len(tokens_seq) + self.add_eos + self.add_bos
|
84 |
+
|
85 |
+
return token_ids, token_lens
|
86 |
+
|
87 |
+
|
88 |
+
def get_text_token_collater(text_tokens_file: str) -> TextTokenCollator:
|
89 |
+
text_tokens_path = Path(text_tokens_file)
|
90 |
+
unique_tokens = SymbolTable.from_file(text_tokens_path)
|
91 |
+
collater = TextTokenCollator(unique_tokens.symbols, add_bos=True, add_eos=True)
|
92 |
+
token2idx = collater.token2idx
|
93 |
+
return collater, token2idx
|
94 |
+
|
95 |
+
|
96 |
+
class phoneIDCollation:
|
97 |
+
def __init__(self, cfg, dataset=None, symbols_dict_file=None) -> None:
|
98 |
+
if cfg.preprocess.phone_extractor != "lexicon":
|
99 |
+
### get text token collator
|
100 |
+
if symbols_dict_file is None:
|
101 |
+
assert dataset is not None
|
102 |
+
symbols_dict_file = os.path.join(
|
103 |
+
cfg.preprocess.processed_dir, dataset, cfg.preprocess.symbols_dict
|
104 |
+
)
|
105 |
+
self.text_token_colloator, token2idx = get_text_token_collater(
|
106 |
+
symbols_dict_file
|
107 |
+
)
|
108 |
+
# # unique_tokens = SymbolTable.from_file(symbols_dict_path)
|
109 |
+
# # text_tokenizer = TextToken(unique_tokens.symbols, add_bos=True, add_eos=True)
|
110 |
+
|
111 |
+
# # update phone symbols dict file with pad_symbol or optional tokens (add_bos and add_eos) in TextTokenCollator
|
112 |
+
# phone_symbol_dict = SymbolTable()
|
113 |
+
# for s in sorted(list(set(token2idx.keys()))):
|
114 |
+
# phone_symbol_dict.add(s)
|
115 |
+
# phone_symbol_dict.to_file(symbols_dict_file)
|
116 |
+
|
117 |
+
def get_phone_id_sequence(self, cfg, phones_seq):
|
118 |
+
if cfg.preprocess.phone_extractor == "lexicon":
|
119 |
+
phones_seq = " ".join(phones_seq)
|
120 |
+
sequence = text_to_sequence(phones_seq, cfg.preprocess.text_cleaners)
|
121 |
+
else:
|
122 |
+
sequence, seq_len = self.text_token_colloator(phones_seq)
|
123 |
+
return sequence
|
utils/HyperParams/__init__.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
from .hps import HyperParams
|
utils/HyperParams/hps.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
|
7 |
+
class HyperParams:
|
8 |
+
"""The class to store hyperparameters. The key is case-insensitive.
|
9 |
+
|
10 |
+
Args:
|
11 |
+
*args: a list of dict or HyperParams.
|
12 |
+
**kwargs: a list of key-value pairs.
|
13 |
+
"""
|
14 |
+
|
15 |
+
def __init__(self, **kwargs):
|
16 |
+
for k, v in kwargs.items():
|
17 |
+
if type(v) == dict:
|
18 |
+
v = HyperParams(**v)
|
19 |
+
self[k] = v
|
20 |
+
|
21 |
+
def keys(self):
|
22 |
+
return self.__dict__.keys()
|
23 |
+
|
24 |
+
def items(self):
|
25 |
+
return self.__dict__.items()
|
26 |
+
|
27 |
+
def values(self):
|
28 |
+
return self.__dict__.values()
|
29 |
+
|
30 |
+
def __len__(self):
|
31 |
+
return len(self.__dict__)
|
32 |
+
|
33 |
+
def __getitem__(self, key):
|
34 |
+
return getattr(self, key)
|
35 |
+
|
36 |
+
def __setitem__(self, key, value):
|
37 |
+
return setattr(self, key, value)
|
38 |
+
|
39 |
+
def __contains__(self, key):
|
40 |
+
return key in self.__dict__
|
41 |
+
|
42 |
+
def __repr__(self):
|
43 |
+
return self.__dict__.__repr__()
|
utils/__init__.py
ADDED
File without changes
|
utils/audio.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import torch
|
7 |
+
import numpy as np
|
8 |
+
from numpy import linalg as LA
|
9 |
+
import librosa
|
10 |
+
import soundfile as sf
|
11 |
+
import librosa.filters
|
12 |
+
|
13 |
+
|
14 |
+
def load_audio_torch(wave_file, fs):
|
15 |
+
"""Load audio data into torch tensor
|
16 |
+
|
17 |
+
Args:
|
18 |
+
wave_file (str): path to wave file
|
19 |
+
fs (int): sample rate
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
audio (tensor): audio data in tensor
|
23 |
+
fs (int): sample rate
|
24 |
+
"""
|
25 |
+
|
26 |
+
audio, sample_rate = librosa.load(wave_file, sr=fs, mono=True)
|
27 |
+
# audio: (T,)
|
28 |
+
assert len(audio) > 2
|
29 |
+
|
30 |
+
# Check the audio type (for soundfile loading backbone) - float, 8bit or 16bit
|
31 |
+
if np.issubdtype(audio.dtype, np.integer):
|
32 |
+
max_mag = -np.iinfo(audio.dtype).min
|
33 |
+
else:
|
34 |
+
max_mag = max(np.amax(audio), -np.amin(audio))
|
35 |
+
max_mag = (
|
36 |
+
(2**31) + 1
|
37 |
+
if max_mag > (2**15)
|
38 |
+
else ((2**15) + 1 if max_mag > 1.01 else 1.0)
|
39 |
+
)
|
40 |
+
|
41 |
+
# Normalize the audio
|
42 |
+
audio = torch.FloatTensor(audio.astype(np.float32)) / max_mag
|
43 |
+
|
44 |
+
if (torch.isnan(audio) | torch.isinf(audio)).any():
|
45 |
+
return [], sample_rate or fs or 48000
|
46 |
+
|
47 |
+
# Resample the audio to our target samplerate
|
48 |
+
if fs is not None and fs != sample_rate:
|
49 |
+
audio = torch.from_numpy(
|
50 |
+
librosa.core.resample(audio.numpy(), orig_sr=sample_rate, target_sr=fs)
|
51 |
+
)
|
52 |
+
sample_rate = fs
|
53 |
+
|
54 |
+
return audio, fs
|
55 |
+
|
56 |
+
|
57 |
+
def _stft(y, cfg):
|
58 |
+
return librosa.stft(
|
59 |
+
y=y, n_fft=cfg.n_fft, hop_length=cfg.hop_size, win_length=cfg.win_size
|
60 |
+
)
|
61 |
+
|
62 |
+
|
63 |
+
def energy(wav, cfg):
|
64 |
+
D = _stft(wav, cfg)
|
65 |
+
magnitudes = np.abs(D).T # [F, T]
|
66 |
+
return LA.norm(magnitudes, axis=1)
|
67 |
+
|
68 |
+
|
69 |
+
def get_energy_from_tacotron(audio, _stft):
|
70 |
+
audio = torch.clip(torch.FloatTensor(audio).unsqueeze(0), -1, 1)
|
71 |
+
audio = torch.autograd.Variable(audio, requires_grad=False)
|
72 |
+
mel, energy = _stft.mel_spectrogram(audio)
|
73 |
+
energy = torch.squeeze(energy, 0).numpy().astype(np.float32)
|
74 |
+
return mel, energy
|
utils/audio_slicer.py
ADDED
@@ -0,0 +1,476 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import os
|
7 |
+
import json
|
8 |
+
import numpy as np
|
9 |
+
from tqdm import tqdm
|
10 |
+
import torch
|
11 |
+
import torchaudio
|
12 |
+
|
13 |
+
from utils.io import save_audio
|
14 |
+
from utils.audio import load_audio_torch
|
15 |
+
|
16 |
+
|
17 |
+
# This function is obtained from librosa.
|
18 |
+
def get_rms(
|
19 |
+
y,
|
20 |
+
*,
|
21 |
+
frame_length=2048,
|
22 |
+
hop_length=512,
|
23 |
+
pad_mode="constant",
|
24 |
+
):
|
25 |
+
padding = (int(frame_length // 2), int(frame_length // 2))
|
26 |
+
y = np.pad(y, padding, mode=pad_mode)
|
27 |
+
|
28 |
+
axis = -1
|
29 |
+
# put our new within-frame axis at the end for now
|
30 |
+
out_strides = y.strides + tuple([y.strides[axis]])
|
31 |
+
# Reduce the shape on the framing axis
|
32 |
+
x_shape_trimmed = list(y.shape)
|
33 |
+
x_shape_trimmed[axis] -= frame_length - 1
|
34 |
+
out_shape = tuple(x_shape_trimmed) + tuple([frame_length])
|
35 |
+
xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)
|
36 |
+
if axis < 0:
|
37 |
+
target_axis = axis - 1
|
38 |
+
else:
|
39 |
+
target_axis = axis + 1
|
40 |
+
xw = np.moveaxis(xw, -1, target_axis)
|
41 |
+
# Downsample along the target axis
|
42 |
+
slices = [slice(None)] * xw.ndim
|
43 |
+
slices[axis] = slice(0, None, hop_length)
|
44 |
+
x = xw[tuple(slices)]
|
45 |
+
|
46 |
+
# Calculate power
|
47 |
+
power = np.mean(np.abs(x) ** 2, axis=-2, keepdims=True)
|
48 |
+
|
49 |
+
return np.sqrt(power)
|
50 |
+
|
51 |
+
|
52 |
+
class Slicer:
|
53 |
+
"""
|
54 |
+
Copy from: https://github.com/openvpi/audio-slicer/blob/main/slicer2.py
|
55 |
+
"""
|
56 |
+
|
57 |
+
def __init__(
|
58 |
+
self,
|
59 |
+
sr: int,
|
60 |
+
threshold: float = -40.0,
|
61 |
+
min_length: int = 5000,
|
62 |
+
min_interval: int = 300,
|
63 |
+
hop_size: int = 10,
|
64 |
+
max_sil_kept: int = 5000,
|
65 |
+
):
|
66 |
+
if not min_length >= min_interval >= hop_size:
|
67 |
+
raise ValueError(
|
68 |
+
"The following condition must be satisfied: min_length >= min_interval >= hop_size"
|
69 |
+
)
|
70 |
+
if not max_sil_kept >= hop_size:
|
71 |
+
raise ValueError(
|
72 |
+
"The following condition must be satisfied: max_sil_kept >= hop_size"
|
73 |
+
)
|
74 |
+
min_interval = sr * min_interval / 1000
|
75 |
+
self.threshold = 10 ** (threshold / 20.0)
|
76 |
+
self.hop_size = round(sr * hop_size / 1000)
|
77 |
+
self.win_size = min(round(min_interval), 4 * self.hop_size)
|
78 |
+
self.min_length = round(sr * min_length / 1000 / self.hop_size)
|
79 |
+
self.min_interval = round(min_interval / self.hop_size)
|
80 |
+
self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)
|
81 |
+
|
82 |
+
def _apply_slice(self, waveform, begin, end):
|
83 |
+
begin = begin * self.hop_size
|
84 |
+
if len(waveform.shape) > 1:
|
85 |
+
end = min(waveform.shape[1], end * self.hop_size)
|
86 |
+
return waveform[:, begin:end], begin, end
|
87 |
+
else:
|
88 |
+
end = min(waveform.shape[0], end * self.hop_size)
|
89 |
+
return waveform[begin:end], begin, end
|
90 |
+
|
91 |
+
# @timeit
|
92 |
+
def slice(self, waveform, return_chunks_positions=False):
|
93 |
+
if len(waveform.shape) > 1:
|
94 |
+
# (#channle, wave_len) -> (wave_len)
|
95 |
+
samples = waveform.mean(axis=0)
|
96 |
+
else:
|
97 |
+
samples = waveform
|
98 |
+
if samples.shape[0] <= self.min_length:
|
99 |
+
return [waveform]
|
100 |
+
rms_list = get_rms(
|
101 |
+
y=samples, frame_length=self.win_size, hop_length=self.hop_size
|
102 |
+
).squeeze(0)
|
103 |
+
sil_tags = []
|
104 |
+
silence_start = None
|
105 |
+
clip_start = 0
|
106 |
+
for i, rms in enumerate(rms_list):
|
107 |
+
# Keep looping while frame is silent.
|
108 |
+
if rms < self.threshold:
|
109 |
+
# Record start of silent frames.
|
110 |
+
if silence_start is None:
|
111 |
+
silence_start = i
|
112 |
+
continue
|
113 |
+
# Keep looping while frame is not silent and silence start has not been recorded.
|
114 |
+
if silence_start is None:
|
115 |
+
continue
|
116 |
+
# Clear recorded silence start if interval is not enough or clip is too short
|
117 |
+
is_leading_silence = silence_start == 0 and i > self.max_sil_kept
|
118 |
+
need_slice_middle = (
|
119 |
+
i - silence_start >= self.min_interval
|
120 |
+
and i - clip_start >= self.min_length
|
121 |
+
)
|
122 |
+
if not is_leading_silence and not need_slice_middle:
|
123 |
+
silence_start = None
|
124 |
+
continue
|
125 |
+
# Need slicing. Record the range of silent frames to be removed.
|
126 |
+
if i - silence_start <= self.max_sil_kept:
|
127 |
+
pos = rms_list[silence_start : i + 1].argmin() + silence_start
|
128 |
+
if silence_start == 0:
|
129 |
+
sil_tags.append((0, pos))
|
130 |
+
else:
|
131 |
+
sil_tags.append((pos, pos))
|
132 |
+
clip_start = pos
|
133 |
+
elif i - silence_start <= self.max_sil_kept * 2:
|
134 |
+
pos = rms_list[
|
135 |
+
i - self.max_sil_kept : silence_start + self.max_sil_kept + 1
|
136 |
+
].argmin()
|
137 |
+
pos += i - self.max_sil_kept
|
138 |
+
pos_l = (
|
139 |
+
rms_list[
|
140 |
+
silence_start : silence_start + self.max_sil_kept + 1
|
141 |
+
].argmin()
|
142 |
+
+ silence_start
|
143 |
+
)
|
144 |
+
pos_r = (
|
145 |
+
rms_list[i - self.max_sil_kept : i + 1].argmin()
|
146 |
+
+ i
|
147 |
+
- self.max_sil_kept
|
148 |
+
)
|
149 |
+
if silence_start == 0:
|
150 |
+
sil_tags.append((0, pos_r))
|
151 |
+
clip_start = pos_r
|
152 |
+
else:
|
153 |
+
sil_tags.append((min(pos_l, pos), max(pos_r, pos)))
|
154 |
+
clip_start = max(pos_r, pos)
|
155 |
+
else:
|
156 |
+
pos_l = (
|
157 |
+
rms_list[
|
158 |
+
silence_start : silence_start + self.max_sil_kept + 1
|
159 |
+
].argmin()
|
160 |
+
+ silence_start
|
161 |
+
)
|
162 |
+
pos_r = (
|
163 |
+
rms_list[i - self.max_sil_kept : i + 1].argmin()
|
164 |
+
+ i
|
165 |
+
- self.max_sil_kept
|
166 |
+
)
|
167 |
+
if silence_start == 0:
|
168 |
+
sil_tags.append((0, pos_r))
|
169 |
+
else:
|
170 |
+
sil_tags.append((pos_l, pos_r))
|
171 |
+
clip_start = pos_r
|
172 |
+
silence_start = None
|
173 |
+
# Deal with trailing silence.
|
174 |
+
total_frames = rms_list.shape[0]
|
175 |
+
if (
|
176 |
+
silence_start is not None
|
177 |
+
and total_frames - silence_start >= self.min_interval
|
178 |
+
):
|
179 |
+
silence_end = min(total_frames, silence_start + self.max_sil_kept)
|
180 |
+
pos = rms_list[silence_start : silence_end + 1].argmin() + silence_start
|
181 |
+
sil_tags.append((pos, total_frames + 1))
|
182 |
+
# Apply and return slices.
|
183 |
+
if len(sil_tags) == 0:
|
184 |
+
return [waveform]
|
185 |
+
else:
|
186 |
+
chunks = []
|
187 |
+
chunks_pos_of_waveform = []
|
188 |
+
|
189 |
+
if sil_tags[0][0] > 0:
|
190 |
+
chunk, begin, end = self._apply_slice(waveform, 0, sil_tags[0][0])
|
191 |
+
chunks.append(chunk)
|
192 |
+
chunks_pos_of_waveform.append((begin, end))
|
193 |
+
|
194 |
+
for i in range(len(sil_tags) - 1):
|
195 |
+
chunk, begin, end = self._apply_slice(
|
196 |
+
waveform, sil_tags[i][1], sil_tags[i + 1][0]
|
197 |
+
)
|
198 |
+
chunks.append(chunk)
|
199 |
+
chunks_pos_of_waveform.append((begin, end))
|
200 |
+
|
201 |
+
if sil_tags[-1][1] < total_frames:
|
202 |
+
chunk, begin, end = self._apply_slice(
|
203 |
+
waveform, sil_tags[-1][1], total_frames
|
204 |
+
)
|
205 |
+
chunks.append(chunk)
|
206 |
+
chunks_pos_of_waveform.append((begin, end))
|
207 |
+
|
208 |
+
return (
|
209 |
+
chunks
|
210 |
+
if not return_chunks_positions
|
211 |
+
else (
|
212 |
+
chunks,
|
213 |
+
chunks_pos_of_waveform,
|
214 |
+
)
|
215 |
+
)
|
216 |
+
|
217 |
+
|
218 |
+
def split_utterances_from_audio(
|
219 |
+
wav_file,
|
220 |
+
output_dir,
|
221 |
+
max_duration_of_utterance=10.0,
|
222 |
+
min_interval=300,
|
223 |
+
db_threshold=-40,
|
224 |
+
):
|
225 |
+
"""
|
226 |
+
Split a long audio into utterances accoring to the silence (VAD).
|
227 |
+
|
228 |
+
max_duration_of_utterance (second):
|
229 |
+
The maximum duration of every utterance (seconds)
|
230 |
+
min_interval (millisecond):
|
231 |
+
The smaller min_interval is, the more sliced audio clips this script is likely to generate.
|
232 |
+
"""
|
233 |
+
print("File:", wav_file.split("/")[-1])
|
234 |
+
waveform, fs = torchaudio.load(wav_file)
|
235 |
+
|
236 |
+
slicer = Slicer(sr=fs, min_interval=min_interval, threshold=db_threshold)
|
237 |
+
chunks, positions = slicer.slice(waveform, return_chunks_positions=True)
|
238 |
+
|
239 |
+
durations = [(end - begin) / fs for begin, end in positions]
|
240 |
+
print(
|
241 |
+
"Slicer's min silence part is {}ms, min and max duration of sliced utterances is {}s and {}s".format(
|
242 |
+
min_interval, min(durations), max(durations)
|
243 |
+
)
|
244 |
+
)
|
245 |
+
|
246 |
+
res_chunks, res_positions = [], []
|
247 |
+
for i, chunk in enumerate(chunks):
|
248 |
+
if len(chunk.shape) == 1:
|
249 |
+
chunk = chunk[None, :]
|
250 |
+
|
251 |
+
begin, end = positions[i]
|
252 |
+
assert end - begin == chunk.shape[-1]
|
253 |
+
|
254 |
+
max_wav_len = max_duration_of_utterance * fs
|
255 |
+
if chunk.shape[-1] <= max_wav_len:
|
256 |
+
res_chunks.append(chunk)
|
257 |
+
res_positions.append(positions[i])
|
258 |
+
else:
|
259 |
+
# TODO: to reserve overlapping and conduct fade-in, fade-out
|
260 |
+
|
261 |
+
# Get segments number
|
262 |
+
number = 2
|
263 |
+
while chunk.shape[-1] // number >= max_wav_len:
|
264 |
+
number += 1
|
265 |
+
seg_len = chunk.shape[-1] // number
|
266 |
+
|
267 |
+
# Split
|
268 |
+
for num in range(number):
|
269 |
+
s = seg_len * num
|
270 |
+
t = min(s + seg_len, chunk.shape[-1])
|
271 |
+
|
272 |
+
seg_begin = begin + s
|
273 |
+
seg_end = begin + t
|
274 |
+
|
275 |
+
res_chunks.append(chunk[:, s:t])
|
276 |
+
res_positions.append((seg_begin, seg_end))
|
277 |
+
|
278 |
+
# Save utterances
|
279 |
+
os.makedirs(output_dir, exist_ok=True)
|
280 |
+
res = {"fs": int(fs)}
|
281 |
+
for i, chunk in enumerate(res_chunks):
|
282 |
+
filename = "{:04d}.wav".format(i)
|
283 |
+
res[filename] = [int(p) for p in res_positions[i]]
|
284 |
+
save_audio(os.path.join(output_dir, filename), chunk, fs)
|
285 |
+
|
286 |
+
# Save positions
|
287 |
+
with open(os.path.join(output_dir, "positions.json"), "w") as f:
|
288 |
+
json.dump(res, f, indent=4, ensure_ascii=False)
|
289 |
+
return res
|
290 |
+
|
291 |
+
|
292 |
+
def is_silence(
|
293 |
+
wavform,
|
294 |
+
fs,
|
295 |
+
threshold=-40.0,
|
296 |
+
min_interval=300,
|
297 |
+
hop_size=10,
|
298 |
+
min_length=5000,
|
299 |
+
):
|
300 |
+
"""
|
301 |
+
Detect whether the given wavform is a silence
|
302 |
+
|
303 |
+
wavform: (T, )
|
304 |
+
"""
|
305 |
+
threshold = 10 ** (threshold / 20.0)
|
306 |
+
|
307 |
+
hop_size = round(fs * hop_size / 1000)
|
308 |
+
win_size = min(round(min_interval), 4 * hop_size)
|
309 |
+
min_length = round(fs * min_length / 1000 / hop_size)
|
310 |
+
|
311 |
+
if wavform.shape[0] <= min_length:
|
312 |
+
return True
|
313 |
+
|
314 |
+
# (#Frame,)
|
315 |
+
rms_array = get_rms(y=wavform, frame_length=win_size, hop_length=hop_size).squeeze(
|
316 |
+
0
|
317 |
+
)
|
318 |
+
return (rms_array < threshold).all()
|
319 |
+
|
320 |
+
|
321 |
+
def split_audio(
|
322 |
+
wav_file, target_sr, output_dir, max_duration_of_segment=10.0, overlap_duration=1.0
|
323 |
+
):
|
324 |
+
"""
|
325 |
+
Split a long audio into segments.
|
326 |
+
|
327 |
+
target_sr:
|
328 |
+
The target sampling rate to save the segments.
|
329 |
+
max_duration_of_utterance (second):
|
330 |
+
The maximum duration of every utterance (second)
|
331 |
+
overlap_duraion:
|
332 |
+
Each segment has "overlap duration" (second) overlap with its previous and next segment
|
333 |
+
"""
|
334 |
+
# (#channel, T) -> (T,)
|
335 |
+
waveform, fs = torchaudio.load(wav_file)
|
336 |
+
waveform = torchaudio.functional.resample(
|
337 |
+
waveform, orig_freq=fs, new_freq=target_sr
|
338 |
+
)
|
339 |
+
waveform = torch.mean(waveform, dim=0)
|
340 |
+
|
341 |
+
# waveform, _ = load_audio_torch(wav_file, target_sr)
|
342 |
+
assert len(waveform.shape) == 1
|
343 |
+
|
344 |
+
assert overlap_duration < max_duration_of_segment
|
345 |
+
length = int(max_duration_of_segment * target_sr)
|
346 |
+
stride = int((max_duration_of_segment - overlap_duration) * target_sr)
|
347 |
+
chunks = []
|
348 |
+
for i in range(0, len(waveform), stride):
|
349 |
+
# (length,)
|
350 |
+
chunks.append(waveform[i : i + length])
|
351 |
+
if i + length >= len(waveform):
|
352 |
+
break
|
353 |
+
|
354 |
+
# Save segments
|
355 |
+
os.makedirs(output_dir, exist_ok=True)
|
356 |
+
results = []
|
357 |
+
for i, chunk in enumerate(chunks):
|
358 |
+
uid = "{:04d}".format(i)
|
359 |
+
filename = os.path.join(output_dir, "{}.wav".format(uid))
|
360 |
+
results.append(
|
361 |
+
{"Uid": uid, "Path": filename, "Duration": len(chunk) / target_sr}
|
362 |
+
)
|
363 |
+
save_audio(
|
364 |
+
filename,
|
365 |
+
chunk,
|
366 |
+
target_sr,
|
367 |
+
turn_up=not is_silence(chunk, target_sr),
|
368 |
+
add_silence=False,
|
369 |
+
)
|
370 |
+
|
371 |
+
return results
|
372 |
+
|
373 |
+
|
374 |
+
def merge_segments_torchaudio(wav_files, fs, output_path, overlap_duration=1.0):
|
375 |
+
"""Merge the given wav_files (may have overlaps) into a long audio
|
376 |
+
|
377 |
+
fs:
|
378 |
+
The sampling rate of the wav files.
|
379 |
+
output_path:
|
380 |
+
The output path to save the merged audio.
|
381 |
+
overlap_duration (float, optional):
|
382 |
+
Each segment has "overlap duration" (second) overlap with its previous and next segment. Defaults to 1.0.
|
383 |
+
"""
|
384 |
+
|
385 |
+
waveforms = []
|
386 |
+
for file in wav_files:
|
387 |
+
# (T,)
|
388 |
+
waveform, _ = load_audio_torch(file, fs)
|
389 |
+
waveforms.append(waveform)
|
390 |
+
|
391 |
+
if len(waveforms) == 1:
|
392 |
+
save_audio(output_path, waveforms[0], fs, add_silence=False, turn_up=False)
|
393 |
+
return
|
394 |
+
|
395 |
+
overlap_len = int(overlap_duration * fs)
|
396 |
+
fade_out = torchaudio.transforms.Fade(fade_out_len=overlap_len)
|
397 |
+
fade_in = torchaudio.transforms.Fade(fade_in_len=overlap_len)
|
398 |
+
fade_in_and_out = torchaudio.transforms.Fade(fade_out_len=overlap_len)
|
399 |
+
|
400 |
+
segments_lens = [len(wav) for wav in waveforms]
|
401 |
+
merged_waveform_len = sum(segments_lens) - overlap_len * (len(waveforms) - 1)
|
402 |
+
merged_waveform = torch.zeros(merged_waveform_len)
|
403 |
+
|
404 |
+
start = 0
|
405 |
+
for index, wav in enumerate(
|
406 |
+
tqdm(waveforms, desc="Merge for {}".format(output_path))
|
407 |
+
):
|
408 |
+
wav_len = len(wav)
|
409 |
+
|
410 |
+
if index == 0:
|
411 |
+
wav = fade_out(wav)
|
412 |
+
elif index == len(waveforms) - 1:
|
413 |
+
wav = fade_in(wav)
|
414 |
+
else:
|
415 |
+
wav = fade_in_and_out(wav)
|
416 |
+
|
417 |
+
merged_waveform[start : start + wav_len] = wav
|
418 |
+
start += wav_len - overlap_len
|
419 |
+
|
420 |
+
save_audio(output_path, merged_waveform, fs, add_silence=False, turn_up=True)
|
421 |
+
|
422 |
+
|
423 |
+
def merge_segments_encodec(wav_files, fs, output_path, overlap_duration=1.0):
|
424 |
+
"""Merge the given wav_files (may have overlaps) into a long audio
|
425 |
+
|
426 |
+
fs:
|
427 |
+
The sampling rate of the wav files.
|
428 |
+
output_path:
|
429 |
+
The output path to save the merged audio.
|
430 |
+
overlap_duration (float, optional):
|
431 |
+
Each segment has "overlap duration" (second) overlap with its previous and next segment. Defaults to 1.0.
|
432 |
+
"""
|
433 |
+
|
434 |
+
waveforms = []
|
435 |
+
for file in wav_files:
|
436 |
+
# (T,)
|
437 |
+
waveform, _ = load_audio_torch(file, fs)
|
438 |
+
waveforms.append(waveform)
|
439 |
+
|
440 |
+
if len(waveforms) == 1:
|
441 |
+
save_audio(output_path, waveforms[0], fs, add_silence=False, turn_up=False)
|
442 |
+
return
|
443 |
+
|
444 |
+
device = waveforms[0].device
|
445 |
+
dtype = waveforms[0].dtype
|
446 |
+
shape = waveforms[0].shape[:-1]
|
447 |
+
|
448 |
+
overlap_len = int(overlap_duration * fs)
|
449 |
+
segments_lens = [len(wav) for wav in waveforms]
|
450 |
+
merged_waveform_len = sum(segments_lens) - overlap_len * (len(waveforms) - 1)
|
451 |
+
|
452 |
+
sum_weight = torch.zeros(merged_waveform_len, device=device, dtype=dtype)
|
453 |
+
out = torch.zeros(*shape, merged_waveform_len, device=device, dtype=dtype)
|
454 |
+
offset = 0
|
455 |
+
|
456 |
+
for frame in waveforms:
|
457 |
+
frame_length = frame.size(-1)
|
458 |
+
t = torch.linspace(0, 1, frame_length + 2, device=device, dtype=torch.float32)[
|
459 |
+
1:-1
|
460 |
+
]
|
461 |
+
weight = 0.5 - (t - 0.5).abs()
|
462 |
+
weighted_frame = frame * weight
|
463 |
+
|
464 |
+
cur = out[..., offset : offset + frame_length]
|
465 |
+
cur += weighted_frame[..., : cur.size(-1)]
|
466 |
+
out[..., offset : offset + frame_length] = cur
|
467 |
+
|
468 |
+
cur = sum_weight[offset : offset + frame_length]
|
469 |
+
cur += weight[..., : cur.size(-1)]
|
470 |
+
sum_weight[offset : offset + frame_length] = cur
|
471 |
+
|
472 |
+
offset += frame_length - overlap_len
|
473 |
+
|
474 |
+
assert sum_weight.min() > 0
|
475 |
+
merged_waveform = out / sum_weight
|
476 |
+
save_audio(output_path, merged_waveform, fs, add_silence=False, turn_up=True)
|
utils/cut_by_vad.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
""" This code is modified from https://github.com/facebookresearch/libri-light/blob/main/data_preparation/cut_by_vad.py"""
|
7 |
+
import pathlib
|
8 |
+
import soundfile as sf
|
9 |
+
import numpy as np
|
10 |
+
import json
|
11 |
+
import multiprocessing
|
12 |
+
import tqdm
|
13 |
+
|
14 |
+
|
15 |
+
def save(seq, fname, index, extension):
|
16 |
+
"""save audio sequences to file"""
|
17 |
+
output = np.hstack(seq)
|
18 |
+
file_name = fname.parent / (fname.stem + f"_{index:04}{extension}")
|
19 |
+
fname.parent.mkdir(exist_ok=True, parents=True)
|
20 |
+
sf.write(file_name, output, samplerate=16000)
|
21 |
+
|
22 |
+
|
23 |
+
def cut_sequence(path, vad, path_out, target_len_sec, out_extension):
|
24 |
+
"""cut audio sequences based on VAD"""
|
25 |
+
data, samplerate = sf.read(path)
|
26 |
+
|
27 |
+
assert len(data.shape) == 1
|
28 |
+
assert samplerate == 16000
|
29 |
+
|
30 |
+
to_stitch = []
|
31 |
+
length_accumulated = 0.0
|
32 |
+
|
33 |
+
i = 0
|
34 |
+
# Iterate over VAD segments
|
35 |
+
for start, end in vad:
|
36 |
+
start_index = int(start * samplerate)
|
37 |
+
end_index = int(end * samplerate)
|
38 |
+
slice = data[start_index:end_index]
|
39 |
+
|
40 |
+
# Save slices that exceed the target length or if there's already accumulated audio
|
41 |
+
if (
|
42 |
+
length_accumulated + (end - start) > target_len_sec
|
43 |
+
and length_accumulated > 0
|
44 |
+
):
|
45 |
+
save(to_stitch, path_out, i, out_extension)
|
46 |
+
to_stitch = []
|
47 |
+
i += 1
|
48 |
+
length_accumulated = 0
|
49 |
+
|
50 |
+
# Add the current slice to the list to be stitched
|
51 |
+
to_stitch.append(slice)
|
52 |
+
length_accumulated += end - start
|
53 |
+
|
54 |
+
# Save any remaining slices
|
55 |
+
if to_stitch:
|
56 |
+
save(to_stitch, path_out, i, out_extension)
|
57 |
+
|
58 |
+
|
59 |
+
def cut_book(task):
|
60 |
+
"""process each book in the dataset"""
|
61 |
+
path_book, root_out, target_len_sec, extension = task
|
62 |
+
|
63 |
+
speaker = pathlib.Path(path_book.parent.name)
|
64 |
+
|
65 |
+
for i, meta_file_path in enumerate(path_book.glob("*.json")):
|
66 |
+
with open(meta_file_path, "r") as f:
|
67 |
+
meta = json.loads(f.read())
|
68 |
+
book_id = meta["book_meta"]["id"]
|
69 |
+
vad = meta["voice_activity"]
|
70 |
+
|
71 |
+
sound_file = meta_file_path.parent / (meta_file_path.stem + ".flac")
|
72 |
+
|
73 |
+
path_out = root_out / speaker / book_id / (meta_file_path.stem)
|
74 |
+
cut_sequence(sound_file, vad, path_out, target_len_sec, extension)
|
75 |
+
|
76 |
+
|
77 |
+
def cut_segments(
|
78 |
+
input_dir, output_dir, target_len_sec=30, n_process=32, out_extension=".wav"
|
79 |
+
):
|
80 |
+
"""Main function to cut segments from audio files"""
|
81 |
+
|
82 |
+
pathlib.Path(output_dir).mkdir(exist_ok=True, parents=True)
|
83 |
+
list_dir = pathlib.Path(input_dir).glob("*/*")
|
84 |
+
list_dir = [x for x in list_dir if x.is_dir()]
|
85 |
+
|
86 |
+
print(f"{len(list_dir)} directories detected")
|
87 |
+
print(f"Launching {n_process} processes")
|
88 |
+
|
89 |
+
# Create tasks for multiprocessing
|
90 |
+
tasks = [
|
91 |
+
(path_book, output_dir, target_len_sec, out_extension) for path_book in list_dir
|
92 |
+
]
|
93 |
+
|
94 |
+
# Process tasks in parallel using multiprocessing
|
95 |
+
with multiprocessing.Pool(processes=n_process) as pool:
|
96 |
+
for _ in tqdm.tqdm(pool.imap_unordered(cut_book, tasks), total=len(tasks)):
|
97 |
+
pass
|
98 |
+
|
99 |
+
|
100 |
+
if __name__ == "__main__":
|
101 |
+
input_dir = "/path/to/input_dir"
|
102 |
+
output_dir = "/path/to/output_dir"
|
103 |
+
target_len_sec = 10
|
104 |
+
n_process = 16
|
105 |
+
cut_segments(input_dir, output_dir, target_len_sec, n_process)
|
utils/data_utils.py
ADDED
@@ -0,0 +1,588 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import json
|
7 |
+
import os
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
from scipy.interpolate import interp1d
|
11 |
+
from tqdm import tqdm
|
12 |
+
from sklearn.preprocessing import StandardScaler
|
13 |
+
|
14 |
+
|
15 |
+
def intersperse(lst, item):
|
16 |
+
"""
|
17 |
+
Insert an item in between any two consecutive elements of the given list, including beginning and end of list
|
18 |
+
|
19 |
+
Example:
|
20 |
+
>>> intersperse(0, [1, 74, 5, 31])
|
21 |
+
[0, 1, 0, 74, 0, 5, 0, 31, 0]
|
22 |
+
"""
|
23 |
+
result = [item] * (len(lst) * 2 + 1)
|
24 |
+
result[1::2] = lst
|
25 |
+
return result
|
26 |
+
|
27 |
+
|
28 |
+
def load_content_feature_path(meta_data, processed_dir, feat_dir):
|
29 |
+
utt2feat_path = {}
|
30 |
+
for utt_info in meta_data:
|
31 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
32 |
+
feat_path = os.path.join(
|
33 |
+
processed_dir, utt_info["Dataset"], feat_dir, f'{utt_info["Uid"]}.npy'
|
34 |
+
)
|
35 |
+
utt2feat_path[utt] = feat_path
|
36 |
+
|
37 |
+
return utt2feat_path
|
38 |
+
|
39 |
+
|
40 |
+
def load_source_content_feature_path(meta_data, feat_dir):
|
41 |
+
utt2feat_path = {}
|
42 |
+
for utt in meta_data:
|
43 |
+
feat_path = os.path.join(feat_dir, f"{utt}.npy")
|
44 |
+
utt2feat_path[utt] = feat_path
|
45 |
+
|
46 |
+
return utt2feat_path
|
47 |
+
|
48 |
+
|
49 |
+
def get_spk_map(spk2id_path, utt2spk_path):
|
50 |
+
utt2spk = {}
|
51 |
+
with open(spk2id_path, "r") as spk2id_file:
|
52 |
+
spk2id = json.load(spk2id_file)
|
53 |
+
with open(utt2spk_path, encoding="utf-8") as f:
|
54 |
+
for line in f.readlines():
|
55 |
+
utt, spk = line.strip().split("\t")
|
56 |
+
utt2spk[utt] = spk
|
57 |
+
return spk2id, utt2spk
|
58 |
+
|
59 |
+
|
60 |
+
def get_target_f0_median(f0_dir):
|
61 |
+
total_f0 = []
|
62 |
+
for utt in os.listdir(f0_dir):
|
63 |
+
if not utt.endswith(".npy"):
|
64 |
+
continue
|
65 |
+
f0_feat_path = os.path.join(f0_dir, utt)
|
66 |
+
f0 = np.load(f0_feat_path)
|
67 |
+
total_f0 += f0.tolist()
|
68 |
+
|
69 |
+
total_f0 = np.array(total_f0)
|
70 |
+
voiced_position = np.where(total_f0 != 0)
|
71 |
+
return np.median(total_f0[voiced_position])
|
72 |
+
|
73 |
+
|
74 |
+
def get_conversion_f0_factor(source_f0, target_median, source_median=None):
|
75 |
+
"""Align the median between source f0 and target f0
|
76 |
+
|
77 |
+
Note: Here we use multiplication, whose factor is target_median/source_median
|
78 |
+
|
79 |
+
Reference: Frequency and pitch interval
|
80 |
+
http://blog.ccyg.studio/article/be12c2ee-d47c-4098-9782-ca76da3035e4/
|
81 |
+
"""
|
82 |
+
if source_median is None:
|
83 |
+
voiced_position = np.where(source_f0 != 0)
|
84 |
+
source_median = np.median(source_f0[voiced_position])
|
85 |
+
factor = target_median / source_median
|
86 |
+
return source_median, factor
|
87 |
+
|
88 |
+
|
89 |
+
def transpose_key(frame_pitch, trans_key):
|
90 |
+
# Transpose by user's argument
|
91 |
+
print("Transpose key = {} ...\n".format(trans_key))
|
92 |
+
|
93 |
+
transed_pitch = frame_pitch * 2 ** (trans_key / 12)
|
94 |
+
return transed_pitch
|
95 |
+
|
96 |
+
|
97 |
+
def pitch_shift_to_target(frame_pitch, target_pitch_median, source_pitch_median=None):
|
98 |
+
# Loading F0 Base (median) and shift
|
99 |
+
source_pitch_median, factor = get_conversion_f0_factor(
|
100 |
+
frame_pitch, target_pitch_median, source_pitch_median
|
101 |
+
)
|
102 |
+
print(
|
103 |
+
"Auto transposing: source f0 median = {:.1f}, target f0 median = {:.1f}, factor = {:.2f}".format(
|
104 |
+
source_pitch_median, target_pitch_median, factor
|
105 |
+
)
|
106 |
+
)
|
107 |
+
transed_pitch = frame_pitch * factor
|
108 |
+
return transed_pitch
|
109 |
+
|
110 |
+
|
111 |
+
def load_frame_pitch(
|
112 |
+
meta_data,
|
113 |
+
processed_dir,
|
114 |
+
pitch_dir,
|
115 |
+
use_log_scale=False,
|
116 |
+
return_norm=False,
|
117 |
+
interoperate=False,
|
118 |
+
utt2spk=None,
|
119 |
+
):
|
120 |
+
utt2pitch = {}
|
121 |
+
utt2uv = {}
|
122 |
+
if utt2spk is None:
|
123 |
+
pitch_scaler = StandardScaler()
|
124 |
+
for utt_info in meta_data:
|
125 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
126 |
+
pitch_path = os.path.join(
|
127 |
+
processed_dir, utt_info["Dataset"], pitch_dir, f'{utt_info["Uid"]}.npy'
|
128 |
+
)
|
129 |
+
pitch = np.load(pitch_path)
|
130 |
+
assert len(pitch) > 0
|
131 |
+
uv = pitch != 0
|
132 |
+
utt2uv[utt] = uv
|
133 |
+
if use_log_scale:
|
134 |
+
nonzero_idxes = np.where(pitch != 0)[0]
|
135 |
+
pitch[nonzero_idxes] = np.log(pitch[nonzero_idxes])
|
136 |
+
utt2pitch[utt] = pitch
|
137 |
+
pitch_scaler.partial_fit(pitch.reshape(-1, 1))
|
138 |
+
|
139 |
+
mean, std = pitch_scaler.mean_[0], pitch_scaler.scale_[0]
|
140 |
+
if return_norm:
|
141 |
+
for utt_info in meta_data:
|
142 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
143 |
+
pitch = utt2pitch[utt]
|
144 |
+
normalized_pitch = (pitch - mean) / std
|
145 |
+
utt2pitch[utt] = normalized_pitch
|
146 |
+
pitch_statistic = {"mean": mean, "std": std}
|
147 |
+
else:
|
148 |
+
spk2utt = {}
|
149 |
+
pitch_statistic = []
|
150 |
+
for utt_info in meta_data:
|
151 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
152 |
+
if not utt2spk[utt] in spk2utt:
|
153 |
+
spk2utt[utt2spk[utt]] = []
|
154 |
+
spk2utt[utt2spk[utt]].append(utt)
|
155 |
+
|
156 |
+
for spk in spk2utt:
|
157 |
+
pitch_scaler = StandardScaler()
|
158 |
+
for utt in spk2utt[spk]:
|
159 |
+
dataset = utt.split("_")[0]
|
160 |
+
uid = "_".join(utt.split("_")[1:])
|
161 |
+
pitch_path = os.path.join(
|
162 |
+
processed_dir, dataset, pitch_dir, f"{uid}.npy"
|
163 |
+
)
|
164 |
+
pitch = np.load(pitch_path)
|
165 |
+
assert len(pitch) > 0
|
166 |
+
uv = pitch != 0
|
167 |
+
utt2uv[utt] = uv
|
168 |
+
if use_log_scale:
|
169 |
+
nonzero_idxes = np.where(pitch != 0)[0]
|
170 |
+
pitch[nonzero_idxes] = np.log(pitch[nonzero_idxes])
|
171 |
+
utt2pitch[utt] = pitch
|
172 |
+
pitch_scaler.partial_fit(pitch.reshape(-1, 1))
|
173 |
+
|
174 |
+
mean, std = pitch_scaler.mean_[0], pitch_scaler.scale_[0]
|
175 |
+
if return_norm:
|
176 |
+
for utt in spk2utt[spk]:
|
177 |
+
pitch = utt2pitch[utt]
|
178 |
+
normalized_pitch = (pitch - mean) / std
|
179 |
+
utt2pitch[utt] = normalized_pitch
|
180 |
+
pitch_statistic.append({"spk": spk, "mean": mean, "std": std})
|
181 |
+
|
182 |
+
return utt2pitch, utt2uv, pitch_statistic
|
183 |
+
|
184 |
+
|
185 |
+
# discard
|
186 |
+
def load_phone_pitch(
|
187 |
+
meta_data,
|
188 |
+
processed_dir,
|
189 |
+
pitch_dir,
|
190 |
+
utt2dur,
|
191 |
+
use_log_scale=False,
|
192 |
+
return_norm=False,
|
193 |
+
interoperate=True,
|
194 |
+
utt2spk=None,
|
195 |
+
):
|
196 |
+
print("Load Phone Pitch")
|
197 |
+
utt2pitch = {}
|
198 |
+
utt2uv = {}
|
199 |
+
if utt2spk is None:
|
200 |
+
pitch_scaler = StandardScaler()
|
201 |
+
for utt_info in tqdm(meta_data):
|
202 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
203 |
+
pitch_path = os.path.join(
|
204 |
+
processed_dir, utt_info["Dataset"], pitch_dir, f'{utt_info["Uid"]}.npy'
|
205 |
+
)
|
206 |
+
frame_pitch = np.load(pitch_path)
|
207 |
+
assert len(frame_pitch) > 0
|
208 |
+
uv = frame_pitch != 0
|
209 |
+
utt2uv[utt] = uv
|
210 |
+
phone_pitch = phone_average_pitch(frame_pitch, utt2dur[utt], interoperate)
|
211 |
+
if use_log_scale:
|
212 |
+
nonzero_idxes = np.where(phone_pitch != 0)[0]
|
213 |
+
phone_pitch[nonzero_idxes] = np.log(phone_pitch[nonzero_idxes])
|
214 |
+
utt2pitch[utt] = phone_pitch
|
215 |
+
pitch_scaler.partial_fit(remove_outlier(phone_pitch).reshape(-1, 1))
|
216 |
+
|
217 |
+
mean, std = pitch_scaler.mean_[0], pitch_scaler.scale_[0]
|
218 |
+
max_value = np.finfo(np.float64).min
|
219 |
+
min_value = np.finfo(np.float64).max
|
220 |
+
if return_norm:
|
221 |
+
for utt_info in meta_data:
|
222 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
223 |
+
pitch = utt2pitch[utt]
|
224 |
+
normalized_pitch = (pitch - mean) / std
|
225 |
+
max_value = max(max_value, max(normalized_pitch))
|
226 |
+
min_value = min(min_value, min(normalized_pitch))
|
227 |
+
utt2pitch[utt] = normalized_pitch
|
228 |
+
phone_normalized_pitch_path = os.path.join(
|
229 |
+
processed_dir,
|
230 |
+
utt_info["Dataset"],
|
231 |
+
"phone_level_" + pitch_dir,
|
232 |
+
f'{utt_info["Uid"]}.npy',
|
233 |
+
)
|
234 |
+
pitch_statistic = {
|
235 |
+
"mean": mean,
|
236 |
+
"std": std,
|
237 |
+
"min_value": min_value,
|
238 |
+
"max_value": max_value,
|
239 |
+
}
|
240 |
+
else:
|
241 |
+
spk2utt = {}
|
242 |
+
pitch_statistic = []
|
243 |
+
for utt_info in tqdm(meta_data):
|
244 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
245 |
+
if not utt2spk[utt] in spk2utt:
|
246 |
+
spk2utt[utt2spk[utt]] = []
|
247 |
+
spk2utt[utt2spk[utt]].append(utt)
|
248 |
+
|
249 |
+
for spk in spk2utt:
|
250 |
+
pitch_scaler = StandardScaler()
|
251 |
+
for utt in spk2utt[spk]:
|
252 |
+
dataset = utt.split("_")[0]
|
253 |
+
uid = "_".join(utt.split("_")[1:])
|
254 |
+
pitch_path = os.path.join(
|
255 |
+
processed_dir, dataset, pitch_dir, f"{uid}.npy"
|
256 |
+
)
|
257 |
+
frame_pitch = np.load(pitch_path)
|
258 |
+
assert len(frame_pitch) > 0
|
259 |
+
uv = frame_pitch != 0
|
260 |
+
utt2uv[utt] = uv
|
261 |
+
phone_pitch = phone_average_pitch(
|
262 |
+
frame_pitch, utt2dur[utt], interoperate
|
263 |
+
)
|
264 |
+
if use_log_scale:
|
265 |
+
nonzero_idxes = np.where(phone_pitch != 0)[0]
|
266 |
+
phone_pitch[nonzero_idxes] = np.log(phone_pitch[nonzero_idxes])
|
267 |
+
utt2pitch[utt] = phone_pitch
|
268 |
+
pitch_scaler.partial_fit(remove_outlier(phone_pitch).reshape(-1, 1))
|
269 |
+
|
270 |
+
mean, std = pitch_scaler.mean_[0], pitch_scaler.scale_[0]
|
271 |
+
max_value = np.finfo(np.float64).min
|
272 |
+
min_value = np.finfo(np.float64).max
|
273 |
+
|
274 |
+
if return_norm:
|
275 |
+
for utt in spk2utt[spk]:
|
276 |
+
pitch = utt2pitch[utt]
|
277 |
+
normalized_pitch = (pitch - mean) / std
|
278 |
+
max_value = max(max_value, max(normalized_pitch))
|
279 |
+
min_value = min(min_value, min(normalized_pitch))
|
280 |
+
utt2pitch[utt] = normalized_pitch
|
281 |
+
pitch_statistic.append(
|
282 |
+
{
|
283 |
+
"spk": spk,
|
284 |
+
"mean": mean,
|
285 |
+
"std": std,
|
286 |
+
"min_value": min_value,
|
287 |
+
"max_value": max_value,
|
288 |
+
}
|
289 |
+
)
|
290 |
+
|
291 |
+
return utt2pitch, utt2uv, pitch_statistic
|
292 |
+
|
293 |
+
|
294 |
+
def phone_average_pitch(pitch, dur, interoperate=False):
|
295 |
+
pos = 0
|
296 |
+
|
297 |
+
if interoperate:
|
298 |
+
nonzero_ids = np.where(pitch != 0)[0]
|
299 |
+
interp_fn = interp1d(
|
300 |
+
nonzero_ids,
|
301 |
+
pitch[nonzero_ids],
|
302 |
+
fill_value=(pitch[nonzero_ids[0]], pitch[nonzero_ids[-1]]),
|
303 |
+
bounds_error=False,
|
304 |
+
)
|
305 |
+
pitch = interp_fn(np.arange(0, len(pitch)))
|
306 |
+
phone_pitch = np.zeros(len(dur))
|
307 |
+
|
308 |
+
for i, d in enumerate(dur):
|
309 |
+
d = int(d)
|
310 |
+
if d > 0 and pos < len(pitch):
|
311 |
+
phone_pitch[i] = np.mean(pitch[pos : pos + d])
|
312 |
+
else:
|
313 |
+
phone_pitch[i] = 0
|
314 |
+
pos += d
|
315 |
+
return phone_pitch
|
316 |
+
|
317 |
+
|
318 |
+
def load_energy(
|
319 |
+
meta_data,
|
320 |
+
processed_dir,
|
321 |
+
energy_dir,
|
322 |
+
use_log_scale=False,
|
323 |
+
return_norm=False,
|
324 |
+
utt2spk=None,
|
325 |
+
):
|
326 |
+
utt2energy = {}
|
327 |
+
if utt2spk is None:
|
328 |
+
for utt_info in meta_data:
|
329 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
330 |
+
energy_path = os.path.join(
|
331 |
+
processed_dir, utt_info["Dataset"], energy_dir, f'{utt_info["Uid"]}.npy'
|
332 |
+
)
|
333 |
+
if not os.path.exists(energy_path):
|
334 |
+
continue
|
335 |
+
energy = np.load(energy_path)
|
336 |
+
assert len(energy) > 0
|
337 |
+
|
338 |
+
if use_log_scale:
|
339 |
+
nonzero_idxes = np.where(energy != 0)[0]
|
340 |
+
energy[nonzero_idxes] = np.log(energy[nonzero_idxes])
|
341 |
+
utt2energy[utt] = energy
|
342 |
+
|
343 |
+
if return_norm:
|
344 |
+
with open(
|
345 |
+
os.path.join(
|
346 |
+
processed_dir, utt_info["Dataset"], energy_dir, "statistics.json"
|
347 |
+
)
|
348 |
+
) as f:
|
349 |
+
stats = json.load(f)
|
350 |
+
mean, std = (
|
351 |
+
stats[utt_info["Dataset"] + "_" + utt_info["Singer"]][
|
352 |
+
"voiced_positions"
|
353 |
+
]["mean"],
|
354 |
+
stats["LJSpeech_LJSpeech"]["voiced_positions"]["std"],
|
355 |
+
)
|
356 |
+
for utt in utt2energy.keys():
|
357 |
+
energy = utt2energy[utt]
|
358 |
+
normalized_energy = (energy - mean) / std
|
359 |
+
utt2energy[utt] = normalized_energy
|
360 |
+
|
361 |
+
energy_statistic = {"mean": mean, "std": std}
|
362 |
+
else:
|
363 |
+
spk2utt = {}
|
364 |
+
energy_statistic = []
|
365 |
+
for utt_info in meta_data:
|
366 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
367 |
+
if not utt2spk[utt] in spk2utt:
|
368 |
+
spk2utt[utt2spk[utt]] = []
|
369 |
+
spk2utt[utt2spk[utt]].append(utt)
|
370 |
+
|
371 |
+
for spk in spk2utt:
|
372 |
+
energy_scaler = StandardScaler()
|
373 |
+
for utt in spk2utt[spk]:
|
374 |
+
dataset = utt.split("_")[0]
|
375 |
+
uid = "_".join(utt.split("_")[1:])
|
376 |
+
energy_path = os.path.join(
|
377 |
+
processed_dir, dataset, energy_dir, f"{uid}.npy"
|
378 |
+
)
|
379 |
+
if not os.path.exists(energy_path):
|
380 |
+
continue
|
381 |
+
frame_energy = np.load(energy_path)
|
382 |
+
assert len(frame_energy) > 0
|
383 |
+
|
384 |
+
if use_log_scale:
|
385 |
+
nonzero_idxes = np.where(frame_energy != 0)[0]
|
386 |
+
frame_energy[nonzero_idxes] = np.log(frame_energy[nonzero_idxes])
|
387 |
+
utt2energy[utt] = frame_energy
|
388 |
+
energy_scaler.partial_fit(frame_energy.reshape(-1, 1))
|
389 |
+
|
390 |
+
mean, std = energy_scaler.mean_[0], energy_scaler.scale_[0]
|
391 |
+
if return_norm:
|
392 |
+
for utt in spk2utt[spk]:
|
393 |
+
energy = utt2energy[utt]
|
394 |
+
normalized_energy = (energy - mean) / std
|
395 |
+
utt2energy[utt] = normalized_energy
|
396 |
+
energy_statistic.append({"spk": spk, "mean": mean, "std": std})
|
397 |
+
|
398 |
+
return utt2energy, energy_statistic
|
399 |
+
|
400 |
+
|
401 |
+
def load_frame_energy(
|
402 |
+
meta_data,
|
403 |
+
processed_dir,
|
404 |
+
energy_dir,
|
405 |
+
use_log_scale=False,
|
406 |
+
return_norm=False,
|
407 |
+
interoperate=False,
|
408 |
+
utt2spk=None,
|
409 |
+
):
|
410 |
+
utt2energy = {}
|
411 |
+
if utt2spk is None:
|
412 |
+
energy_scaler = StandardScaler()
|
413 |
+
for utt_info in meta_data:
|
414 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
415 |
+
energy_path = os.path.join(
|
416 |
+
processed_dir, utt_info["Dataset"], energy_dir, f'{utt_info["Uid"]}.npy'
|
417 |
+
)
|
418 |
+
frame_energy = np.load(energy_path)
|
419 |
+
assert len(frame_energy) > 0
|
420 |
+
|
421 |
+
if use_log_scale:
|
422 |
+
nonzero_idxes = np.where(frame_energy != 0)[0]
|
423 |
+
frame_energy[nonzero_idxes] = np.log(frame_energy[nonzero_idxes])
|
424 |
+
utt2energy[utt] = frame_energy
|
425 |
+
energy_scaler.partial_fit(frame_energy.reshape(-1, 1))
|
426 |
+
|
427 |
+
mean, std = energy_scaler.mean_[0], energy_scaler.scale_[0]
|
428 |
+
if return_norm:
|
429 |
+
for utt_info in meta_data:
|
430 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
431 |
+
energy = utt2energy[utt]
|
432 |
+
normalized_energy = (energy - mean) / std
|
433 |
+
utt2energy[utt] = normalized_energy
|
434 |
+
energy_statistic = {"mean": mean, "std": std}
|
435 |
+
|
436 |
+
else:
|
437 |
+
spk2utt = {}
|
438 |
+
energy_statistic = []
|
439 |
+
for utt_info in meta_data:
|
440 |
+
utt = utt_info["Dataset"] + "_" + utt_info["Uid"]
|
441 |
+
if not utt2spk[utt] in spk2utt:
|
442 |
+
spk2utt[utt2spk[utt]] = []
|
443 |
+
spk2utt[utt2spk[utt]].append(utt)
|
444 |
+
|
445 |
+
for spk in spk2utt:
|
446 |
+
energy_scaler = StandardScaler()
|
447 |
+
for utt in spk2utt[spk]:
|
448 |
+
dataset = utt.split("_")[0]
|
449 |
+
uid = "_".join(utt.split("_")[1:])
|
450 |
+
energy_path = os.path.join(
|
451 |
+
processed_dir, dataset, energy_dir, f"{uid}.npy"
|
452 |
+
)
|
453 |
+
frame_energy = np.load(energy_path)
|
454 |
+
assert len(frame_energy) > 0
|
455 |
+
|
456 |
+
if use_log_scale:
|
457 |
+
nonzero_idxes = np.where(frame_energy != 0)[0]
|
458 |
+
frame_energy[nonzero_idxes] = np.log(frame_energy[nonzero_idxes])
|
459 |
+
utt2energy[utt] = frame_energy
|
460 |
+
energy_scaler.partial_fit(frame_energy.reshape(-1, 1))
|
461 |
+
|
462 |
+
mean, std = energy_scaler.mean_[0], energy_scaler.scale_[0]
|
463 |
+
if return_norm:
|
464 |
+
for utt in spk2utt[spk]:
|
465 |
+
energy = utt2energy[utt]
|
466 |
+
normalized_energy = (energy - mean) / std
|
467 |
+
utt2energy[utt] = normalized_energy
|
468 |
+
energy_statistic.append({"spk": spk, "mean": mean, "std": std})
|
469 |
+
|
470 |
+
return utt2energy, energy_statistic
|
471 |
+
|
472 |
+
|
473 |
+
def align_length(feature, target_len, pad_value=0.0):
|
474 |
+
feature_len = feature.shape[-1]
|
475 |
+
dim = len(feature.shape)
|
476 |
+
# align 1-D data
|
477 |
+
if dim == 2:
|
478 |
+
if target_len > feature_len:
|
479 |
+
feature = np.pad(
|
480 |
+
feature,
|
481 |
+
((0, 0), (0, target_len - feature_len)),
|
482 |
+
constant_values=pad_value,
|
483 |
+
)
|
484 |
+
else:
|
485 |
+
feature = feature[:, :target_len]
|
486 |
+
# align 2-D data
|
487 |
+
elif dim == 1:
|
488 |
+
if target_len > feature_len:
|
489 |
+
feature = np.pad(
|
490 |
+
feature, (0, target_len - feature_len), constant_values=pad_value
|
491 |
+
)
|
492 |
+
else:
|
493 |
+
feature = feature[:target_len]
|
494 |
+
else:
|
495 |
+
raise NotImplementedError
|
496 |
+
return feature
|
497 |
+
|
498 |
+
|
499 |
+
def align_whisper_feauture_length(
|
500 |
+
feature, target_len, fast_mapping=True, source_hop=320, target_hop=256
|
501 |
+
):
|
502 |
+
factor = np.gcd(source_hop, target_hop)
|
503 |
+
source_hop //= factor
|
504 |
+
target_hop //= factor
|
505 |
+
# print(
|
506 |
+
# "Mapping source's {} frames => target's {} frames".format(
|
507 |
+
# target_hop, source_hop
|
508 |
+
# )
|
509 |
+
# )
|
510 |
+
|
511 |
+
max_source_len = 1500
|
512 |
+
target_len = min(target_len, max_source_len * source_hop // target_hop)
|
513 |
+
|
514 |
+
width = feature.shape[-1]
|
515 |
+
|
516 |
+
if fast_mapping:
|
517 |
+
source_len = target_len * target_hop // source_hop + 1
|
518 |
+
feature = feature[:source_len]
|
519 |
+
|
520 |
+
else:
|
521 |
+
source_len = max_source_len
|
522 |
+
|
523 |
+
# const ~= target_len * target_hop
|
524 |
+
const = source_len * source_hop // target_hop * target_hop
|
525 |
+
|
526 |
+
# (source_len * source_hop, dim)
|
527 |
+
up_sampling_feats = np.repeat(feature, source_hop, axis=0)
|
528 |
+
# (const, dim) -> (const/target_hop, target_hop, dim) -> (const/target_hop, dim)
|
529 |
+
down_sampling_feats = np.average(
|
530 |
+
up_sampling_feats[:const].reshape(-1, target_hop, width), axis=1
|
531 |
+
)
|
532 |
+
assert len(down_sampling_feats) >= target_len
|
533 |
+
|
534 |
+
# (target_len, dim)
|
535 |
+
feat = down_sampling_feats[:target_len]
|
536 |
+
|
537 |
+
return feat
|
538 |
+
|
539 |
+
|
540 |
+
def align_content_feature_length(feature, target_len, source_hop=320, target_hop=256):
|
541 |
+
factor = np.gcd(source_hop, target_hop)
|
542 |
+
source_hop //= factor
|
543 |
+
target_hop //= factor
|
544 |
+
# print(
|
545 |
+
# "Mapping source's {} frames => target's {} frames".format(
|
546 |
+
# target_hop, source_hop
|
547 |
+
# )
|
548 |
+
# )
|
549 |
+
|
550 |
+
# (source_len, 256)
|
551 |
+
source_len, width = feature.shape
|
552 |
+
|
553 |
+
# const ~= target_len * target_hop
|
554 |
+
const = source_len * source_hop // target_hop * target_hop
|
555 |
+
|
556 |
+
# (source_len * source_hop, dim)
|
557 |
+
up_sampling_feats = np.repeat(feature, source_hop, axis=0)
|
558 |
+
# (const, dim) -> (const/target_hop, target_hop, dim) -> (const/target_hop, dim)
|
559 |
+
down_sampling_feats = np.average(
|
560 |
+
up_sampling_feats[:const].reshape(-1, target_hop, width), axis=1
|
561 |
+
)
|
562 |
+
|
563 |
+
err = abs(target_len - len(down_sampling_feats))
|
564 |
+
if err > 4: ## why 4 not 3?
|
565 |
+
print("target_len:", target_len)
|
566 |
+
print("raw feature:", feature.shape)
|
567 |
+
print("up_sampling:", up_sampling_feats.shape)
|
568 |
+
print("down_sampling_feats:", down_sampling_feats.shape)
|
569 |
+
exit()
|
570 |
+
if len(down_sampling_feats) < target_len:
|
571 |
+
# (1, dim) -> (err, dim)
|
572 |
+
end = down_sampling_feats[-1][None, :].repeat(err, axis=0)
|
573 |
+
down_sampling_feats = np.concatenate([down_sampling_feats, end], axis=0)
|
574 |
+
|
575 |
+
# (target_len, dim)
|
576 |
+
feat = down_sampling_feats[:target_len]
|
577 |
+
|
578 |
+
return feat
|
579 |
+
|
580 |
+
|
581 |
+
def remove_outlier(values):
|
582 |
+
values = np.array(values)
|
583 |
+
p25 = np.percentile(values, 25)
|
584 |
+
p75 = np.percentile(values, 75)
|
585 |
+
lower = p25 - 1.5 * (p75 - p25)
|
586 |
+
upper = p75 + 1.5 * (p75 - p25)
|
587 |
+
normal_indices = np.logical_and(values > lower, values < upper)
|
588 |
+
return values[normal_indices]
|
utils/distribution.py
ADDED
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import torch
|
8 |
+
import torch.nn.functional as F
|
9 |
+
|
10 |
+
from torch.distributions import Normal
|
11 |
+
|
12 |
+
|
13 |
+
def log_sum_exp(x):
|
14 |
+
"""numerically stable log_sum_exp implementation that prevents overflow"""
|
15 |
+
# TF ordering
|
16 |
+
axis = len(x.size()) - 1
|
17 |
+
m, _ = torch.max(x, dim=axis)
|
18 |
+
m2, _ = torch.max(x, dim=axis, keepdim=True)
|
19 |
+
return m + torch.log(torch.sum(torch.exp(x - m2), dim=axis))
|
20 |
+
|
21 |
+
|
22 |
+
def discretized_mix_logistic_loss(
|
23 |
+
y_hat, y, num_classes=256, log_scale_min=-7.0, reduce=True
|
24 |
+
):
|
25 |
+
"""Discretized mixture of logistic distributions loss
|
26 |
+
|
27 |
+
Note that it is assumed that input is scaled to [-1, 1].
|
28 |
+
|
29 |
+
Args:
|
30 |
+
y_hat (Tensor): Predicted output (B x C x T)
|
31 |
+
y (Tensor): Target (B x T x 1).
|
32 |
+
num_classes (int): Number of classes
|
33 |
+
log_scale_min (float): Log scale minimum value
|
34 |
+
reduce (bool): If True, the losses are averaged or summed for each
|
35 |
+
minibatch.
|
36 |
+
|
37 |
+
Returns
|
38 |
+
Tensor: loss
|
39 |
+
"""
|
40 |
+
assert y_hat.dim() == 3
|
41 |
+
assert y_hat.size(1) % 3 == 0
|
42 |
+
nr_mix = y_hat.size(1) // 3
|
43 |
+
|
44 |
+
# (B x T x C)
|
45 |
+
y_hat = y_hat.transpose(1, 2)
|
46 |
+
|
47 |
+
# unpack parameters. (B, T, num_mixtures) x 3
|
48 |
+
logit_probs = y_hat[:, :, :nr_mix]
|
49 |
+
means = y_hat[:, :, nr_mix : 2 * nr_mix]
|
50 |
+
log_scales = torch.clamp(y_hat[:, :, 2 * nr_mix : 3 * nr_mix], min=log_scale_min)
|
51 |
+
|
52 |
+
# B x T x 1 -> B x T x num_mixtures
|
53 |
+
y = y.expand_as(means)
|
54 |
+
|
55 |
+
centered_y = y - means
|
56 |
+
inv_stdv = torch.exp(-log_scales)
|
57 |
+
plus_in = inv_stdv * (centered_y + 1.0 / (num_classes - 1))
|
58 |
+
cdf_plus = torch.sigmoid(plus_in)
|
59 |
+
min_in = inv_stdv * (centered_y - 1.0 / (num_classes - 1))
|
60 |
+
cdf_min = torch.sigmoid(min_in)
|
61 |
+
|
62 |
+
# log probability for edge case of 0 (before scaling)
|
63 |
+
# equivalent: torch.log(torch.sigmoid(plus_in))
|
64 |
+
log_cdf_plus = plus_in - F.softplus(plus_in)
|
65 |
+
|
66 |
+
# log probability for edge case of 255 (before scaling)
|
67 |
+
# equivalent: (1 - torch.sigmoid(min_in)).log()
|
68 |
+
log_one_minus_cdf_min = -F.softplus(min_in)
|
69 |
+
|
70 |
+
# probability for all other cases
|
71 |
+
cdf_delta = cdf_plus - cdf_min
|
72 |
+
|
73 |
+
mid_in = inv_stdv * centered_y
|
74 |
+
# log probability in the center of the bin, to be used in extreme cases
|
75 |
+
# (not actually used in our code)
|
76 |
+
log_pdf_mid = mid_in - log_scales - 2.0 * F.softplus(mid_in)
|
77 |
+
|
78 |
+
# tf equivalent
|
79 |
+
"""
|
80 |
+
log_probs = tf.where(x < -0.999, log_cdf_plus,
|
81 |
+
tf.where(x > 0.999, log_one_minus_cdf_min,
|
82 |
+
tf.where(cdf_delta > 1e-5,
|
83 |
+
tf.log(tf.maximum(cdf_delta, 1e-12)),
|
84 |
+
log_pdf_mid - np.log(127.5))))
|
85 |
+
"""
|
86 |
+
# TODO: cdf_delta <= 1e-5 actually can happen. How can we choose the value
|
87 |
+
# for num_classes=65536 case? 1e-7? not sure..
|
88 |
+
inner_inner_cond = (cdf_delta > 1e-5).float()
|
89 |
+
|
90 |
+
inner_inner_out = inner_inner_cond * torch.log(
|
91 |
+
torch.clamp(cdf_delta, min=1e-12)
|
92 |
+
) + (1.0 - inner_inner_cond) * (log_pdf_mid - np.log((num_classes - 1) / 2))
|
93 |
+
inner_cond = (y > 0.999).float()
|
94 |
+
inner_out = (
|
95 |
+
inner_cond * log_one_minus_cdf_min + (1.0 - inner_cond) * inner_inner_out
|
96 |
+
)
|
97 |
+
cond = (y < -0.999).float()
|
98 |
+
log_probs = cond * log_cdf_plus + (1.0 - cond) * inner_out
|
99 |
+
|
100 |
+
log_probs = log_probs + F.log_softmax(logit_probs, -1)
|
101 |
+
|
102 |
+
if reduce:
|
103 |
+
return -torch.sum(log_sum_exp(log_probs))
|
104 |
+
else:
|
105 |
+
return -log_sum_exp(log_probs).unsqueeze(-1)
|
106 |
+
|
107 |
+
|
108 |
+
def to_one_hot(tensor, n, fill_with=1.0):
|
109 |
+
# we perform one hot encore with respect to the last axis
|
110 |
+
one_hot = torch.FloatTensor(tensor.size() + (n,)).zero_()
|
111 |
+
if tensor.is_cuda:
|
112 |
+
one_hot = one_hot.cuda()
|
113 |
+
one_hot.scatter_(len(tensor.size()), tensor.unsqueeze(-1), fill_with)
|
114 |
+
return one_hot
|
115 |
+
|
116 |
+
|
117 |
+
def sample_from_discretized_mix_logistic(y, log_scale_min=-7.0, clamp_log_scale=False):
|
118 |
+
"""
|
119 |
+
Sample from discretized mixture of logistic distributions
|
120 |
+
|
121 |
+
Args:
|
122 |
+
y (Tensor): B x C x T
|
123 |
+
log_scale_min (float): Log scale minimum value
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
Tensor: sample in range of [-1, 1].
|
127 |
+
"""
|
128 |
+
assert y.size(1) % 3 == 0
|
129 |
+
nr_mix = y.size(1) // 3
|
130 |
+
|
131 |
+
# B x T x C
|
132 |
+
y = y.transpose(1, 2)
|
133 |
+
logit_probs = y[:, :, :nr_mix]
|
134 |
+
|
135 |
+
# sample mixture indicator from softmax
|
136 |
+
temp = logit_probs.data.new(logit_probs.size()).uniform_(1e-5, 1.0 - 1e-5)
|
137 |
+
temp = logit_probs.data - torch.log(-torch.log(temp))
|
138 |
+
_, argmax = temp.max(dim=-1)
|
139 |
+
|
140 |
+
# (B, T) -> (B, T, nr_mix)
|
141 |
+
one_hot = to_one_hot(argmax, nr_mix)
|
142 |
+
# select logistic parameters
|
143 |
+
means = torch.sum(y[:, :, nr_mix : 2 * nr_mix] * one_hot, dim=-1)
|
144 |
+
log_scales = torch.sum(y[:, :, 2 * nr_mix : 3 * nr_mix] * one_hot, dim=-1)
|
145 |
+
if clamp_log_scale:
|
146 |
+
log_scales = torch.clamp(log_scales, min=log_scale_min)
|
147 |
+
# sample from logistic & clip to interval
|
148 |
+
# we don't actually round to the nearest 8bit value when sampling
|
149 |
+
u = means.data.new(means.size()).uniform_(1e-5, 1.0 - 1e-5)
|
150 |
+
x = means + torch.exp(log_scales) * (torch.log(u) - torch.log(1.0 - u))
|
151 |
+
|
152 |
+
x = torch.clamp(torch.clamp(x, min=-1.0), max=1.0)
|
153 |
+
|
154 |
+
return x
|
155 |
+
|
156 |
+
|
157 |
+
# we can easily define discretized version of the gaussian loss, however,
|
158 |
+
# use continuous version as same as the https://clarinet-demo.github.io/
|
159 |
+
def mix_gaussian_loss(y_hat, y, log_scale_min=-7.0, reduce=True):
|
160 |
+
"""Mixture of continuous gaussian distributions loss
|
161 |
+
|
162 |
+
Note that it is assumed that input is scaled to [-1, 1].
|
163 |
+
|
164 |
+
Args:
|
165 |
+
y_hat (Tensor): Predicted output (B x C x T)
|
166 |
+
y (Tensor): Target (B x T x 1).
|
167 |
+
log_scale_min (float): Log scale minimum value
|
168 |
+
reduce (bool): If True, the losses are averaged or summed for each
|
169 |
+
minibatch.
|
170 |
+
Returns
|
171 |
+
Tensor: loss
|
172 |
+
"""
|
173 |
+
assert y_hat.dim() == 3
|
174 |
+
C = y_hat.size(1)
|
175 |
+
if C == 2:
|
176 |
+
nr_mix = 1
|
177 |
+
else:
|
178 |
+
assert y_hat.size(1) % 3 == 0
|
179 |
+
nr_mix = y_hat.size(1) // 3
|
180 |
+
|
181 |
+
# (B x T x C)
|
182 |
+
y_hat = y_hat.transpose(1, 2)
|
183 |
+
|
184 |
+
# unpack parameters.
|
185 |
+
if C == 2:
|
186 |
+
# special case for C == 2, just for compatibility
|
187 |
+
logit_probs = None
|
188 |
+
means = y_hat[:, :, 0:1]
|
189 |
+
log_scales = torch.clamp(y_hat[:, :, 1:2], min=log_scale_min)
|
190 |
+
else:
|
191 |
+
# (B, T, num_mixtures) x 3
|
192 |
+
logit_probs = y_hat[:, :, :nr_mix]
|
193 |
+
means = y_hat[:, :, nr_mix : 2 * nr_mix]
|
194 |
+
log_scales = torch.clamp(
|
195 |
+
y_hat[:, :, 2 * nr_mix : 3 * nr_mix], min=log_scale_min
|
196 |
+
)
|
197 |
+
|
198 |
+
# B x T x 1 -> B x T x num_mixtures
|
199 |
+
y = y.expand_as(means)
|
200 |
+
|
201 |
+
centered_y = y - means
|
202 |
+
dist = Normal(loc=0.0, scale=torch.exp(log_scales))
|
203 |
+
# do we need to add a trick to avoid log(0)?
|
204 |
+
log_probs = dist.log_prob(centered_y)
|
205 |
+
|
206 |
+
if nr_mix > 1:
|
207 |
+
log_probs = log_probs + F.log_softmax(logit_probs, -1)
|
208 |
+
|
209 |
+
if reduce:
|
210 |
+
if nr_mix == 1:
|
211 |
+
return -torch.sum(log_probs)
|
212 |
+
else:
|
213 |
+
return -torch.sum(log_sum_exp(log_probs))
|
214 |
+
else:
|
215 |
+
if nr_mix == 1:
|
216 |
+
return -log_probs
|
217 |
+
else:
|
218 |
+
return -log_sum_exp(log_probs).unsqueeze(-1)
|
219 |
+
|
220 |
+
|
221 |
+
def sample_from_mix_gaussian(y, log_scale_min=-7.0):
|
222 |
+
"""
|
223 |
+
Sample from (discretized) mixture of gaussian distributions
|
224 |
+
Args:
|
225 |
+
y (Tensor): B x C x T
|
226 |
+
log_scale_min (float): Log scale minimum value
|
227 |
+
Returns:
|
228 |
+
Tensor: sample in range of [-1, 1].
|
229 |
+
"""
|
230 |
+
C = y.size(1)
|
231 |
+
if C == 2:
|
232 |
+
nr_mix = 1
|
233 |
+
else:
|
234 |
+
assert y.size(1) % 3 == 0
|
235 |
+
nr_mix = y.size(1) // 3
|
236 |
+
|
237 |
+
# B x T x C
|
238 |
+
y = y.transpose(1, 2)
|
239 |
+
|
240 |
+
if C == 2:
|
241 |
+
logit_probs = None
|
242 |
+
else:
|
243 |
+
logit_probs = y[:, :, :nr_mix]
|
244 |
+
|
245 |
+
if nr_mix > 1:
|
246 |
+
# sample mixture indicator from softmax
|
247 |
+
temp = logit_probs.data.new(logit_probs.size()).uniform_(1e-5, 1.0 - 1e-5)
|
248 |
+
temp = logit_probs.data - torch.log(-torch.log(temp))
|
249 |
+
_, argmax = temp.max(dim=-1)
|
250 |
+
|
251 |
+
# (B, T) -> (B, T, nr_mix)
|
252 |
+
one_hot = to_one_hot(argmax, nr_mix)
|
253 |
+
|
254 |
+
# Select means and log scales
|
255 |
+
means = torch.sum(y[:, :, nr_mix : 2 * nr_mix] * one_hot, dim=-1)
|
256 |
+
log_scales = torch.sum(y[:, :, 2 * nr_mix : 3 * nr_mix] * one_hot, dim=-1)
|
257 |
+
else:
|
258 |
+
if C == 2:
|
259 |
+
means, log_scales = y[:, :, 0], y[:, :, 1]
|
260 |
+
elif C == 3:
|
261 |
+
means, log_scales = y[:, :, 1], y[:, :, 2]
|
262 |
+
else:
|
263 |
+
assert False, "shouldn't happen"
|
264 |
+
|
265 |
+
scales = torch.exp(log_scales)
|
266 |
+
dist = Normal(loc=means, scale=scales)
|
267 |
+
x = dist.sample()
|
268 |
+
|
269 |
+
x = torch.clamp(x, min=-1.0, max=1.0)
|
270 |
+
return x
|
utils/dsp.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import torch
|
8 |
+
|
9 |
+
# ZERO = 1e-12
|
10 |
+
|
11 |
+
|
12 |
+
def gaussian_normalize_mel_channel(mel, mu, sigma):
|
13 |
+
"""
|
14 |
+
Shift to Standorm Normal Distribution
|
15 |
+
|
16 |
+
Args:
|
17 |
+
mel: (n_mels, frame_len)
|
18 |
+
mu: (n_mels,), mean value
|
19 |
+
sigma: (n_mels,), sd value
|
20 |
+
Return:
|
21 |
+
Tensor like mel
|
22 |
+
"""
|
23 |
+
mu = np.expand_dims(mu, -1)
|
24 |
+
sigma = np.expand_dims(sigma, -1)
|
25 |
+
return (mel - mu) / sigma
|
26 |
+
|
27 |
+
|
28 |
+
def de_gaussian_normalize_mel_channel(mel, mu, sigma):
|
29 |
+
"""
|
30 |
+
|
31 |
+
Args:
|
32 |
+
mel: (n_mels, frame_len)
|
33 |
+
mu: (n_mels,), mean value
|
34 |
+
sigma: (n_mels,), sd value
|
35 |
+
Return:
|
36 |
+
Tensor like mel
|
37 |
+
"""
|
38 |
+
mu = np.expand_dims(mu, -1)
|
39 |
+
sigma = np.expand_dims(sigma, -1)
|
40 |
+
return sigma * mel + mu
|
41 |
+
|
42 |
+
|
43 |
+
def decompress(audio_compressed, bits):
|
44 |
+
mu = 2**bits - 1
|
45 |
+
audio = np.sign(audio_compressed) / mu * ((1 + mu) ** np.abs(audio_compressed) - 1)
|
46 |
+
return audio
|
47 |
+
|
48 |
+
|
49 |
+
def compress(audio, bits):
|
50 |
+
mu = 2**bits - 1
|
51 |
+
audio_compressed = np.sign(audio) * np.log(1 + mu * np.abs(audio)) / np.log(mu + 1)
|
52 |
+
return audio_compressed
|
53 |
+
|
54 |
+
|
55 |
+
def label_to_audio(quant, bits):
|
56 |
+
classes = 2**bits
|
57 |
+
audio = 2 * quant / (classes - 1.0) - 1.0
|
58 |
+
return audio
|
59 |
+
|
60 |
+
|
61 |
+
def audio_to_label(audio, bits):
|
62 |
+
"""Normalized audio data tensor to digit array
|
63 |
+
|
64 |
+
Args:
|
65 |
+
audio (tensor): audio data
|
66 |
+
bits (int): data bits
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
array<int>: digit array of audio data
|
70 |
+
"""
|
71 |
+
classes = 2**bits
|
72 |
+
# initialize an increasing array with values from -1 to 1
|
73 |
+
bins = np.linspace(-1, 1, classes)
|
74 |
+
# change value in audio tensor to digits
|
75 |
+
quant = np.digitize(audio, bins) - 1
|
76 |
+
return quant
|
77 |
+
|
78 |
+
|
79 |
+
def label_to_onehot(x, bits):
|
80 |
+
"""Converts a class vector (integers) to binary class matrix.
|
81 |
+
Args:
|
82 |
+
x: class vector to be converted into a matrix
|
83 |
+
(integers from 0 to num_classes).
|
84 |
+
num_classes: total number of classes.
|
85 |
+
Returns:
|
86 |
+
A binary matrix representation of the input. The classes axis
|
87 |
+
is placed last.
|
88 |
+
"""
|
89 |
+
classes = 2**bits
|
90 |
+
|
91 |
+
result = torch.zeros((x.shape[0], classes), dtype=torch.float32)
|
92 |
+
for i in range(x.shape[0]):
|
93 |
+
result[i, x[i]] = 1
|
94 |
+
|
95 |
+
output_shape = x.shape + (classes,)
|
96 |
+
output = torch.reshape(result, output_shape)
|
97 |
+
return output
|
utils/duration.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import os
|
8 |
+
import tgt
|
9 |
+
|
10 |
+
|
11 |
+
def get_alignment(tier, cfg):
|
12 |
+
sample_rate = cfg["sample_rate"]
|
13 |
+
hop_size = cfg["hop_size"]
|
14 |
+
|
15 |
+
sil_phones = ["sil", "sp", "spn"]
|
16 |
+
|
17 |
+
phones = []
|
18 |
+
durations = []
|
19 |
+
start_time = 0
|
20 |
+
end_time = 0
|
21 |
+
end_idx = 0
|
22 |
+
|
23 |
+
for t in tier._objects:
|
24 |
+
s, e, p = t.start_time, t.end_time, t.text
|
25 |
+
|
26 |
+
# Trim leading silences
|
27 |
+
if phones == []:
|
28 |
+
if p in sil_phones:
|
29 |
+
continue
|
30 |
+
else:
|
31 |
+
start_time = s
|
32 |
+
|
33 |
+
if p not in sil_phones:
|
34 |
+
# For ordinary phones
|
35 |
+
phones.append(p)
|
36 |
+
end_time = e
|
37 |
+
end_idx = len(phones)
|
38 |
+
else:
|
39 |
+
# For silent phones
|
40 |
+
phones.append(p)
|
41 |
+
|
42 |
+
durations.append(
|
43 |
+
int(
|
44 |
+
np.round(e * sample_rate / hop_size)
|
45 |
+
- np.round(s * sample_rate / hop_size)
|
46 |
+
)
|
47 |
+
)
|
48 |
+
|
49 |
+
# Trim tailing silences
|
50 |
+
phones = phones[:end_idx]
|
51 |
+
durations = durations[:end_idx]
|
52 |
+
|
53 |
+
return phones, durations, start_time, end_time
|
54 |
+
|
55 |
+
|
56 |
+
def get_duration(utt, wav, cfg):
|
57 |
+
speaker = utt["Singer"]
|
58 |
+
basename = utt["Uid"]
|
59 |
+
dataset = utt["Dataset"]
|
60 |
+
sample_rate = cfg["sample_rate"]
|
61 |
+
|
62 |
+
# print(cfg.processed_dir, dataset, speaker, basename)
|
63 |
+
wav_path = os.path.join(
|
64 |
+
cfg.processed_dir, dataset, "raw_data", speaker, "{}.wav".format(basename)
|
65 |
+
)
|
66 |
+
text_path = os.path.join(
|
67 |
+
cfg.processed_dir, dataset, "raw_data", speaker, "{}.lab".format(basename)
|
68 |
+
)
|
69 |
+
tg_path = os.path.join(
|
70 |
+
cfg.processed_dir, dataset, "TextGrid", speaker, "{}.TextGrid".format(basename)
|
71 |
+
)
|
72 |
+
|
73 |
+
# Read raw text
|
74 |
+
with open(text_path, "r") as f:
|
75 |
+
raw_text = f.readline().strip("\n")
|
76 |
+
|
77 |
+
# Get alignments
|
78 |
+
textgrid = tgt.io.read_textgrid(tg_path)
|
79 |
+
phone, duration, start, end = get_alignment(
|
80 |
+
textgrid.get_tier_by_name("phones"), cfg
|
81 |
+
)
|
82 |
+
text = "{" + " ".join(phone) + "}"
|
83 |
+
if start >= end:
|
84 |
+
return None
|
85 |
+
|
86 |
+
return duration, text, int(sample_rate * start), int(sample_rate * end)
|
utils/f0.py
ADDED
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import librosa
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
import parselmouth
|
10 |
+
import torchcrepe
|
11 |
+
import pyworld as pw
|
12 |
+
|
13 |
+
|
14 |
+
def f0_to_coarse(f0, pitch_bin, f0_min, f0_max):
|
15 |
+
"""
|
16 |
+
Convert f0 (Hz) to pitch (mel scale), and then quantize the mel-scale pitch to the
|
17 |
+
range from [1, 2, 3, ..., pitch_bin-1]
|
18 |
+
|
19 |
+
Reference: https://en.wikipedia.org/wiki/Mel_scale
|
20 |
+
|
21 |
+
Args:
|
22 |
+
f0 (array or Tensor): Hz
|
23 |
+
pitch_bin (int): the vocabulary size
|
24 |
+
f0_min (int): the minimum f0 (Hz)
|
25 |
+
f0_max (int): the maximum f0 (Hz)
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
quantized f0 (array or Tensor)
|
29 |
+
"""
|
30 |
+
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
|
31 |
+
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
|
32 |
+
|
33 |
+
is_torch = isinstance(f0, torch.Tensor)
|
34 |
+
f0_mel = 1127 * (1 + f0 / 700).log() if is_torch else 1127 * np.log(1 + f0 / 700)
|
35 |
+
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * (pitch_bin - 2) / (
|
36 |
+
f0_mel_max - f0_mel_min
|
37 |
+
) + 1
|
38 |
+
|
39 |
+
f0_mel[f0_mel <= 1] = 1
|
40 |
+
f0_mel[f0_mel > pitch_bin - 1] = pitch_bin - 1
|
41 |
+
f0_coarse = (f0_mel + 0.5).long() if is_torch else np.rint(f0_mel).astype(np.int32)
|
42 |
+
assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (
|
43 |
+
f0_coarse.max(),
|
44 |
+
f0_coarse.min(),
|
45 |
+
)
|
46 |
+
return f0_coarse
|
47 |
+
|
48 |
+
|
49 |
+
def interpolate(f0):
|
50 |
+
"""Interpolate the unvoiced part. Thus the f0 can be passed to a subtractive synthesizer.
|
51 |
+
Args:
|
52 |
+
f0: A numpy array of shape (seq_len,)
|
53 |
+
Returns:
|
54 |
+
f0: Interpolated f0 of shape (seq_len,)
|
55 |
+
uv: Unvoiced part of shape (seq_len,)
|
56 |
+
"""
|
57 |
+
uv = f0 == 0
|
58 |
+
if len(f0[~uv]) > 0:
|
59 |
+
# interpolate the unvoiced f0
|
60 |
+
f0[uv] = np.interp(np.where(uv)[0], np.where(~uv)[0], f0[~uv])
|
61 |
+
uv = uv.astype("float")
|
62 |
+
uv = np.min(np.array([uv[:-2], uv[1:-1], uv[2:]]), axis=0)
|
63 |
+
uv = np.pad(uv, (1, 1))
|
64 |
+
return f0, uv
|
65 |
+
|
66 |
+
|
67 |
+
def get_log_f0(f0):
|
68 |
+
f0[np.where(f0 == 0)] = 1
|
69 |
+
log_f0 = np.log(f0)
|
70 |
+
return log_f0
|
71 |
+
|
72 |
+
|
73 |
+
def get_f0_features_using_pyin(audio, cfg):
|
74 |
+
"""Using pyin to extract the f0 feature.
|
75 |
+
Args:
|
76 |
+
audio
|
77 |
+
fs
|
78 |
+
win_length
|
79 |
+
hop_length
|
80 |
+
f0_min
|
81 |
+
f0_max
|
82 |
+
Returns:
|
83 |
+
f0: numpy array of shape (frame_len,)
|
84 |
+
"""
|
85 |
+
f0, voiced_flag, voiced_probs = librosa.pyin(
|
86 |
+
y=audio,
|
87 |
+
fmin=cfg.f0_min,
|
88 |
+
fmax=cfg.f0_max,
|
89 |
+
sr=cfg.sample_rate,
|
90 |
+
win_length=cfg.win_size,
|
91 |
+
hop_length=cfg.hop_size,
|
92 |
+
)
|
93 |
+
# Set nan to 0
|
94 |
+
f0[voiced_flag == False] = 0
|
95 |
+
return f0
|
96 |
+
|
97 |
+
|
98 |
+
def get_f0_features_using_parselmouth(audio, cfg, speed=1):
|
99 |
+
"""Using parselmouth to extract the f0 feature.
|
100 |
+
Args:
|
101 |
+
audio
|
102 |
+
mel_len
|
103 |
+
hop_length
|
104 |
+
fs
|
105 |
+
f0_min
|
106 |
+
f0_max
|
107 |
+
speed(default=1)
|
108 |
+
Returns:
|
109 |
+
f0: numpy array of shape (frame_len,)
|
110 |
+
pitch_coarse: numpy array of shape (frame_len,)
|
111 |
+
"""
|
112 |
+
hop_size = int(np.round(cfg.hop_size * speed))
|
113 |
+
|
114 |
+
# Calculate the time step for pitch extraction
|
115 |
+
time_step = hop_size / cfg.sample_rate * 1000
|
116 |
+
|
117 |
+
f0 = (
|
118 |
+
parselmouth.Sound(audio, cfg.sample_rate)
|
119 |
+
.to_pitch_ac(
|
120 |
+
time_step=time_step / 1000,
|
121 |
+
voicing_threshold=0.6,
|
122 |
+
pitch_floor=cfg.f0_min,
|
123 |
+
pitch_ceiling=cfg.f0_max,
|
124 |
+
)
|
125 |
+
.selected_array["frequency"]
|
126 |
+
)
|
127 |
+
return f0
|
128 |
+
|
129 |
+
|
130 |
+
def get_f0_features_using_dio(audio, cfg):
|
131 |
+
"""Using dio to extract the f0 feature.
|
132 |
+
Args:
|
133 |
+
audio
|
134 |
+
mel_len
|
135 |
+
fs
|
136 |
+
hop_length
|
137 |
+
f0_min
|
138 |
+
f0_max
|
139 |
+
Returns:
|
140 |
+
f0: numpy array of shape (frame_len,)
|
141 |
+
"""
|
142 |
+
# Get the raw f0
|
143 |
+
_f0, t = pw.dio(
|
144 |
+
audio.astype("double"),
|
145 |
+
cfg.sample_rate,
|
146 |
+
f0_floor=cfg.f0_min,
|
147 |
+
f0_ceil=cfg.f0_max,
|
148 |
+
channels_in_octave=2,
|
149 |
+
frame_period=(1000 * cfg.hop_size / cfg.sample_rate),
|
150 |
+
)
|
151 |
+
# Get the f0
|
152 |
+
f0 = pw.stonemask(audio.astype("double"), _f0, t, cfg.sample_rate)
|
153 |
+
return f0
|
154 |
+
|
155 |
+
|
156 |
+
def get_f0_features_using_harvest(audio, mel_len, fs, hop_length, f0_min, f0_max):
|
157 |
+
"""Using harvest to extract the f0 feature.
|
158 |
+
Args:
|
159 |
+
audio
|
160 |
+
mel_len
|
161 |
+
fs
|
162 |
+
hop_length
|
163 |
+
f0_min
|
164 |
+
f0_max
|
165 |
+
Returns:
|
166 |
+
f0: numpy array of shape (frame_len,)
|
167 |
+
"""
|
168 |
+
f0, _ = pw.harvest(
|
169 |
+
audio.astype("double"),
|
170 |
+
fs,
|
171 |
+
f0_floor=f0_min,
|
172 |
+
f0_ceil=f0_max,
|
173 |
+
frame_period=(1000 * hop_length / fs),
|
174 |
+
)
|
175 |
+
f0 = f0.astype("float")[:mel_len]
|
176 |
+
return f0
|
177 |
+
|
178 |
+
|
179 |
+
def get_f0_features_using_crepe(
|
180 |
+
audio, mel_len, fs, hop_length, hop_length_new, f0_min, f0_max, threshold=0.3
|
181 |
+
):
|
182 |
+
"""Using torchcrepe to extract the f0 feature.
|
183 |
+
Args:
|
184 |
+
audio
|
185 |
+
mel_len
|
186 |
+
fs
|
187 |
+
hop_length
|
188 |
+
hop_length_new
|
189 |
+
f0_min
|
190 |
+
f0_max
|
191 |
+
threshold(default=0.3)
|
192 |
+
Returns:
|
193 |
+
f0: numpy array of shape (frame_len,)
|
194 |
+
"""
|
195 |
+
# Currently, crepe only supports 16khz audio
|
196 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
197 |
+
audio_16k = librosa.resample(audio, orig_sr=fs, target_sr=16000)
|
198 |
+
audio_16k_torch = torch.FloatTensor(audio_16k).unsqueeze(0).to(device)
|
199 |
+
|
200 |
+
# Get the raw pitch
|
201 |
+
f0, pd = torchcrepe.predict(
|
202 |
+
audio_16k_torch,
|
203 |
+
16000,
|
204 |
+
hop_length_new,
|
205 |
+
f0_min,
|
206 |
+
f0_max,
|
207 |
+
pad=True,
|
208 |
+
model="full",
|
209 |
+
batch_size=1024,
|
210 |
+
device=device,
|
211 |
+
return_periodicity=True,
|
212 |
+
)
|
213 |
+
|
214 |
+
# Filter, de-silence, set up threshold for unvoiced part
|
215 |
+
pd = torchcrepe.filter.median(pd, 3)
|
216 |
+
pd = torchcrepe.threshold.Silence(-60.0)(pd, audio_16k_torch, 16000, hop_length_new)
|
217 |
+
f0 = torchcrepe.threshold.At(threshold)(f0, pd)
|
218 |
+
f0 = torchcrepe.filter.mean(f0, 3)
|
219 |
+
|
220 |
+
# Convert unvoiced part to 0hz
|
221 |
+
f0 = torch.where(torch.isnan(f0), torch.full_like(f0, 0), f0)
|
222 |
+
|
223 |
+
# Interpolate f0
|
224 |
+
nzindex = torch.nonzero(f0[0]).squeeze()
|
225 |
+
f0 = torch.index_select(f0[0], dim=0, index=nzindex).cpu().numpy()
|
226 |
+
time_org = 0.005 * nzindex.cpu().numpy()
|
227 |
+
time_frame = np.arange(mel_len) * hop_length / fs
|
228 |
+
f0 = np.interp(time_frame, time_org, f0, left=f0[0], right=f0[-1])
|
229 |
+
return f0
|
230 |
+
|
231 |
+
|
232 |
+
def get_f0(audio, cfg, use_interpolate=False, return_uv=False):
|
233 |
+
if cfg.pitch_extractor == "dio":
|
234 |
+
f0 = get_f0_features_using_dio(audio, cfg)
|
235 |
+
elif cfg.pitch_extractor == "pyin":
|
236 |
+
f0 = get_f0_features_using_pyin(audio, cfg)
|
237 |
+
elif cfg.pitch_extractor == "parselmouth":
|
238 |
+
f0 = get_f0_features_using_parselmouth(audio, cfg)
|
239 |
+
|
240 |
+
if use_interpolate:
|
241 |
+
f0, uv = interpolate(f0)
|
242 |
+
else:
|
243 |
+
uv = f0 == 0
|
244 |
+
|
245 |
+
if return_uv:
|
246 |
+
return f0, uv
|
247 |
+
|
248 |
+
return f0
|
249 |
+
|
250 |
+
|
251 |
+
def get_cents(f0_hz):
|
252 |
+
"""
|
253 |
+
F_{cent} = 1200 * log2 (F/440)
|
254 |
+
|
255 |
+
Reference:
|
256 |
+
APSIPA'17, Perceptual Evaluation of Singing Quality
|
257 |
+
"""
|
258 |
+
voiced_f0 = f0_hz[f0_hz != 0]
|
259 |
+
return 1200 * np.log2(voiced_f0 / 440)
|
260 |
+
|
261 |
+
|
262 |
+
def get_pitch_derivatives(f0_hz):
|
263 |
+
"""
|
264 |
+
f0_hz: (,T)
|
265 |
+
"""
|
266 |
+
f0_cent = get_cents(f0_hz)
|
267 |
+
return f0_cent[1:] - f0_cent[:-1]
|
268 |
+
|
269 |
+
|
270 |
+
def get_pitch_sub_median(f0_hz):
|
271 |
+
"""
|
272 |
+
f0_hz: (,T)
|
273 |
+
"""
|
274 |
+
f0_cent = get_cents(f0_hz)
|
275 |
+
return f0_cent - np.median(f0_cent)
|
utils/hparam.py
ADDED
@@ -0,0 +1,659 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
# This code is modified from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/training/python/training/hparam.py pylint: disable=line-too-long
|
7 |
+
"""Hyperparameter values."""
|
8 |
+
from __future__ import absolute_import
|
9 |
+
from __future__ import division
|
10 |
+
from __future__ import print_function
|
11 |
+
|
12 |
+
import json
|
13 |
+
import numbers
|
14 |
+
import re
|
15 |
+
import six
|
16 |
+
|
17 |
+
# Define the regular expression for parsing a single clause of the input
|
18 |
+
# (delimited by commas). A legal clause looks like:
|
19 |
+
# <variable name>[<index>]? = <rhs>
|
20 |
+
# where <rhs> is either a single token or [] enclosed list of tokens.
|
21 |
+
# For example: "var[1] = a" or "x = [1,2,3]"
|
22 |
+
PARAM_RE = re.compile(
|
23 |
+
r"""
|
24 |
+
(?P<name>[a-zA-Z][\w\.]*) # variable name: "var" or "x"
|
25 |
+
(\[\s*(?P<index>\d+)\s*\])? # (optional) index: "1" or None
|
26 |
+
\s*=\s*
|
27 |
+
((?P<val>[^,\[]*) # single value: "a" or None
|
28 |
+
|
|
29 |
+
\[(?P<vals>[^\]]*)\]) # list of values: None or "1,2,3"
|
30 |
+
($|,\s*)""",
|
31 |
+
re.VERBOSE,
|
32 |
+
)
|
33 |
+
|
34 |
+
|
35 |
+
def _parse_fail(name, var_type, value, values):
|
36 |
+
"""Helper function for raising a value error for bad assignment."""
|
37 |
+
raise ValueError(
|
38 |
+
"Could not parse hparam '%s' of type '%s' with value '%s' in %s"
|
39 |
+
% (name, var_type.__name__, value, values)
|
40 |
+
)
|
41 |
+
|
42 |
+
|
43 |
+
def _reuse_fail(name, values):
|
44 |
+
"""Helper function for raising a value error for reuse of name."""
|
45 |
+
raise ValueError("Multiple assignments to variable '%s' in %s" % (name, values))
|
46 |
+
|
47 |
+
|
48 |
+
def _process_scalar_value(name, parse_fn, var_type, m_dict, values, results_dictionary):
|
49 |
+
"""Update results_dictionary with a scalar value.
|
50 |
+
|
51 |
+
Used to update the results_dictionary to be returned by parse_values when
|
52 |
+
encountering a clause with a scalar RHS (e.g. "s=5" or "arr[0]=5".)
|
53 |
+
|
54 |
+
Mutates results_dictionary.
|
55 |
+
|
56 |
+
Args:
|
57 |
+
name: Name of variable in assignment ("s" or "arr").
|
58 |
+
parse_fn: Function for parsing the actual value.
|
59 |
+
var_type: Type of named variable.
|
60 |
+
m_dict: Dictionary constructed from regex parsing.
|
61 |
+
m_dict['val']: RHS value (scalar)
|
62 |
+
m_dict['index']: List index value (or None)
|
63 |
+
values: Full expression being parsed
|
64 |
+
results_dictionary: The dictionary being updated for return by the parsing
|
65 |
+
function.
|
66 |
+
|
67 |
+
Raises:
|
68 |
+
ValueError: If the name has already been used.
|
69 |
+
"""
|
70 |
+
try:
|
71 |
+
parsed_value = parse_fn(m_dict["val"])
|
72 |
+
except ValueError:
|
73 |
+
_parse_fail(name, var_type, m_dict["val"], values)
|
74 |
+
|
75 |
+
# If no index is provided
|
76 |
+
if not m_dict["index"]:
|
77 |
+
if name in results_dictionary:
|
78 |
+
_reuse_fail(name, values)
|
79 |
+
results_dictionary[name] = parsed_value
|
80 |
+
else:
|
81 |
+
if name in results_dictionary:
|
82 |
+
# The name has already been used as a scalar, then it
|
83 |
+
# will be in this dictionary and map to a non-dictionary.
|
84 |
+
if not isinstance(results_dictionary.get(name), dict):
|
85 |
+
_reuse_fail(name, values)
|
86 |
+
else:
|
87 |
+
results_dictionary[name] = {}
|
88 |
+
|
89 |
+
index = int(m_dict["index"])
|
90 |
+
# Make sure the index position hasn't already been assigned a value.
|
91 |
+
if index in results_dictionary[name]:
|
92 |
+
_reuse_fail("{}[{}]".format(name, index), values)
|
93 |
+
results_dictionary[name][index] = parsed_value
|
94 |
+
|
95 |
+
|
96 |
+
def _process_list_value(name, parse_fn, var_type, m_dict, values, results_dictionary):
|
97 |
+
"""Update results_dictionary from a list of values.
|
98 |
+
|
99 |
+
Used to update results_dictionary to be returned by parse_values when
|
100 |
+
encountering a clause with a list RHS (e.g. "arr=[1,2,3]".)
|
101 |
+
|
102 |
+
Mutates results_dictionary.
|
103 |
+
|
104 |
+
Args:
|
105 |
+
name: Name of variable in assignment ("arr").
|
106 |
+
parse_fn: Function for parsing individual values.
|
107 |
+
var_type: Type of named variable.
|
108 |
+
m_dict: Dictionary constructed from regex parsing.
|
109 |
+
m_dict['val']: RHS value (scalar)
|
110 |
+
values: Full expression being parsed
|
111 |
+
results_dictionary: The dictionary being updated for return by the parsing
|
112 |
+
function.
|
113 |
+
|
114 |
+
Raises:
|
115 |
+
ValueError: If the name has an index or the values cannot be parsed.
|
116 |
+
"""
|
117 |
+
if m_dict["index"] is not None:
|
118 |
+
raise ValueError("Assignment of a list to a list index.")
|
119 |
+
elements = filter(None, re.split("[ ,]", m_dict["vals"]))
|
120 |
+
# Make sure the name hasn't already been assigned a value
|
121 |
+
if name in results_dictionary:
|
122 |
+
raise _reuse_fail(name, values)
|
123 |
+
try:
|
124 |
+
results_dictionary[name] = [parse_fn(e) for e in elements]
|
125 |
+
except ValueError:
|
126 |
+
_parse_fail(name, var_type, m_dict["vals"], values)
|
127 |
+
|
128 |
+
|
129 |
+
def _cast_to_type_if_compatible(name, param_type, value):
|
130 |
+
"""Cast hparam to the provided type, if compatible.
|
131 |
+
|
132 |
+
Args:
|
133 |
+
name: Name of the hparam to be cast.
|
134 |
+
param_type: The type of the hparam.
|
135 |
+
value: The value to be cast, if compatible.
|
136 |
+
|
137 |
+
Returns:
|
138 |
+
The result of casting `value` to `param_type`.
|
139 |
+
|
140 |
+
Raises:
|
141 |
+
ValueError: If the type of `value` is not compatible with param_type.
|
142 |
+
* If `param_type` is a string type, but `value` is not.
|
143 |
+
* If `param_type` is a boolean, but `value` is not, or vice versa.
|
144 |
+
* If `param_type` is an integer type, but `value` is not.
|
145 |
+
* If `param_type` is a float type, but `value` is not a numeric type.
|
146 |
+
"""
|
147 |
+
fail_msg = "Could not cast hparam '%s' of type '%s' from value %r" % (
|
148 |
+
name,
|
149 |
+
param_type,
|
150 |
+
value,
|
151 |
+
)
|
152 |
+
|
153 |
+
# Some callers use None, for which we can't do any casting/checking. :(
|
154 |
+
if issubclass(param_type, type(None)):
|
155 |
+
return value
|
156 |
+
|
157 |
+
# Avoid converting a non-string type to a string.
|
158 |
+
if issubclass(param_type, (six.string_types, six.binary_type)) and not isinstance(
|
159 |
+
value, (six.string_types, six.binary_type)
|
160 |
+
):
|
161 |
+
raise ValueError(fail_msg)
|
162 |
+
|
163 |
+
# Avoid converting a number or string type to a boolean or vice versa.
|
164 |
+
if issubclass(param_type, bool) != isinstance(value, bool):
|
165 |
+
raise ValueError(fail_msg)
|
166 |
+
|
167 |
+
# Avoid converting float to an integer (the reverse is fine).
|
168 |
+
if issubclass(param_type, numbers.Integral) and not isinstance(
|
169 |
+
value, numbers.Integral
|
170 |
+
):
|
171 |
+
raise ValueError(fail_msg)
|
172 |
+
|
173 |
+
# Avoid converting a non-numeric type to a numeric type.
|
174 |
+
if issubclass(param_type, numbers.Number) and not isinstance(value, numbers.Number):
|
175 |
+
raise ValueError(fail_msg)
|
176 |
+
|
177 |
+
return param_type(value)
|
178 |
+
|
179 |
+
|
180 |
+
def parse_values(values, type_map, ignore_unknown=False):
|
181 |
+
"""Parses hyperparameter values from a string into a python map.
|
182 |
+
|
183 |
+
`values` is a string containing comma-separated `name=value` pairs.
|
184 |
+
For each pair, the value of the hyperparameter named `name` is set to
|
185 |
+
`value`.
|
186 |
+
|
187 |
+
If a hyperparameter name appears multiple times in `values`, a ValueError
|
188 |
+
is raised (e.g. 'a=1,a=2', 'a[1]=1,a[1]=2').
|
189 |
+
|
190 |
+
If a hyperparameter name in both an index assignment and scalar assignment,
|
191 |
+
a ValueError is raised. (e.g. 'a=[1,2,3],a[0] = 1').
|
192 |
+
|
193 |
+
The hyperparameter name may contain '.' symbols, which will result in an
|
194 |
+
attribute name that is only accessible through the getattr and setattr
|
195 |
+
functions. (And must be first explicit added through add_hparam.)
|
196 |
+
|
197 |
+
WARNING: Use of '.' in your variable names is allowed, but is not well
|
198 |
+
supported and not recommended.
|
199 |
+
|
200 |
+
The `value` in `name=value` must follows the syntax according to the
|
201 |
+
type of the parameter:
|
202 |
+
|
203 |
+
* Scalar integer: A Python-parsable integer point value. E.g.: 1,
|
204 |
+
100, -12.
|
205 |
+
* Scalar float: A Python-parsable floating point value. E.g.: 1.0,
|
206 |
+
-.54e89.
|
207 |
+
* Boolean: Either true or false.
|
208 |
+
* Scalar string: A non-empty sequence of characters, excluding comma,
|
209 |
+
spaces, and square brackets. E.g.: foo, bar_1.
|
210 |
+
* List: A comma separated list of scalar values of the parameter type
|
211 |
+
enclosed in square brackets. E.g.: [1,2,3], [1.0,1e-12], [high,low].
|
212 |
+
|
213 |
+
When index assignment is used, the corresponding type_map key should be the
|
214 |
+
list name. E.g. for "arr[1]=0" the type_map must have the key "arr" (not
|
215 |
+
"arr[1]").
|
216 |
+
|
217 |
+
Args:
|
218 |
+
values: String. Comma separated list of `name=value` pairs where
|
219 |
+
'value' must follow the syntax described above.
|
220 |
+
type_map: A dictionary mapping hyperparameter names to types. Note every
|
221 |
+
parameter name in values must be a key in type_map. The values must
|
222 |
+
conform to the types indicated, where a value V is said to conform to a
|
223 |
+
type T if either V has type T, or V is a list of elements of type T.
|
224 |
+
Hence, for a multidimensional parameter 'x' taking float values,
|
225 |
+
'x=[0.1,0.2]' will parse successfully if type_map['x'] = float.
|
226 |
+
ignore_unknown: Bool. Whether values that are missing a type in type_map
|
227 |
+
should be ignored. If set to True, a ValueError will not be raised for
|
228 |
+
unknown hyperparameter type.
|
229 |
+
|
230 |
+
Returns:
|
231 |
+
A python map mapping each name to either:
|
232 |
+
* A scalar value.
|
233 |
+
* A list of scalar values.
|
234 |
+
* A dictionary mapping index numbers to scalar values.
|
235 |
+
(e.g. "x=5,L=[1,2],arr[1]=3" results in {'x':5,'L':[1,2],'arr':{1:3}}")
|
236 |
+
|
237 |
+
Raises:
|
238 |
+
ValueError: If there is a problem with input.
|
239 |
+
* If `values` cannot be parsed.
|
240 |
+
* If a list is assigned to a list index (e.g. 'a[1] = [1,2,3]').
|
241 |
+
* If the same rvalue is assigned two different values (e.g. 'a=1,a=2',
|
242 |
+
'a[1]=1,a[1]=2', or 'a=1,a=[1]')
|
243 |
+
"""
|
244 |
+
results_dictionary = {}
|
245 |
+
pos = 0
|
246 |
+
while pos < len(values):
|
247 |
+
m = PARAM_RE.match(values, pos)
|
248 |
+
if not m:
|
249 |
+
raise ValueError("Malformed hyperparameter value: %s" % values[pos:])
|
250 |
+
# Check that there is a comma between parameters and move past it.
|
251 |
+
pos = m.end()
|
252 |
+
# Parse the values.
|
253 |
+
m_dict = m.groupdict()
|
254 |
+
name = m_dict["name"]
|
255 |
+
if name not in type_map:
|
256 |
+
if ignore_unknown:
|
257 |
+
continue
|
258 |
+
raise ValueError("Unknown hyperparameter type for %s" % name)
|
259 |
+
type_ = type_map[name]
|
260 |
+
|
261 |
+
# Set up correct parsing function (depending on whether type_ is a bool)
|
262 |
+
if type_ == bool:
|
263 |
+
|
264 |
+
def parse_bool(value):
|
265 |
+
if value in ["true", "True"]:
|
266 |
+
return True
|
267 |
+
elif value in ["false", "False"]:
|
268 |
+
return False
|
269 |
+
else:
|
270 |
+
try:
|
271 |
+
return bool(int(value))
|
272 |
+
except ValueError:
|
273 |
+
_parse_fail(name, type_, value, values)
|
274 |
+
|
275 |
+
parse = parse_bool
|
276 |
+
else:
|
277 |
+
parse = type_
|
278 |
+
|
279 |
+
# If a singe value is provided
|
280 |
+
if m_dict["val"] is not None:
|
281 |
+
_process_scalar_value(
|
282 |
+
name, parse, type_, m_dict, values, results_dictionary
|
283 |
+
)
|
284 |
+
|
285 |
+
# If the assigned value is a list:
|
286 |
+
elif m_dict["vals"] is not None:
|
287 |
+
_process_list_value(name, parse, type_, m_dict, values, results_dictionary)
|
288 |
+
|
289 |
+
else: # Not assigned a list or value
|
290 |
+
_parse_fail(name, type_, "", values)
|
291 |
+
|
292 |
+
return results_dictionary
|
293 |
+
|
294 |
+
|
295 |
+
class HParams(object):
|
296 |
+
"""Class to hold a set of hyperparameters as name-value pairs.
|
297 |
+
|
298 |
+
A `HParams` object holds hyperparameters used to build and train a model,
|
299 |
+
such as the number of hidden units in a neural net layer or the learning rate
|
300 |
+
to use when training.
|
301 |
+
|
302 |
+
You first create a `HParams` object by specifying the names and values of the
|
303 |
+
hyperparameters.
|
304 |
+
|
305 |
+
To make them easily accessible the parameter names are added as direct
|
306 |
+
attributes of the class. A typical usage is as follows:
|
307 |
+
|
308 |
+
```python
|
309 |
+
# Create a HParams object specifying names and values of the model
|
310 |
+
# hyperparameters:
|
311 |
+
hparams = HParams(learning_rate=0.1, num_hidden_units=100)
|
312 |
+
|
313 |
+
# The hyperparameter are available as attributes of the HParams object:
|
314 |
+
hparams.learning_rate ==> 0.1
|
315 |
+
hparams.num_hidden_units ==> 100
|
316 |
+
```
|
317 |
+
|
318 |
+
Hyperparameters have type, which is inferred from the type of their value
|
319 |
+
passed at construction type. The currently supported types are: integer,
|
320 |
+
float, boolean, string, and list of integer, float, boolean, or string.
|
321 |
+
|
322 |
+
You can override hyperparameter values by calling the
|
323 |
+
[`parse()`](#HParams.parse) method, passing a string of comma separated
|
324 |
+
`name=value` pairs. This is intended to make it possible to override
|
325 |
+
any hyperparameter values from a single command-line flag to which
|
326 |
+
the user passes 'hyper-param=value' pairs. It avoids having to define
|
327 |
+
one flag for each hyperparameter.
|
328 |
+
|
329 |
+
The syntax expected for each value depends on the type of the parameter.
|
330 |
+
See `parse()` for a description of the syntax.
|
331 |
+
|
332 |
+
Example:
|
333 |
+
|
334 |
+
```python
|
335 |
+
# Define a command line flag to pass name=value pairs.
|
336 |
+
# For example using argparse:
|
337 |
+
import argparse
|
338 |
+
parser = argparse.ArgumentParser(description='Train my model.')
|
339 |
+
parser.add_argument('--hparams', type=str,
|
340 |
+
help='Comma separated list of "name=value" pairs.')
|
341 |
+
args = parser.parse_args()
|
342 |
+
...
|
343 |
+
def my_program():
|
344 |
+
# Create a HParams object specifying the names and values of the
|
345 |
+
# model hyperparameters:
|
346 |
+
hparams = tf.HParams(learning_rate=0.1, num_hidden_units=100,
|
347 |
+
activations=['relu', 'tanh'])
|
348 |
+
|
349 |
+
# Override hyperparameters values by parsing the command line
|
350 |
+
hparams.parse(args.hparams)
|
351 |
+
|
352 |
+
# If the user passed `--hparams=learning_rate=0.3` on the command line
|
353 |
+
# then 'hparams' has the following attributes:
|
354 |
+
hparams.learning_rate ==> 0.3
|
355 |
+
hparams.num_hidden_units ==> 100
|
356 |
+
hparams.activations ==> ['relu', 'tanh']
|
357 |
+
|
358 |
+
# If the hyperparameters are in json format use parse_json:
|
359 |
+
hparams.parse_json('{"learning_rate": 0.3, "activations": "relu"}')
|
360 |
+
```
|
361 |
+
"""
|
362 |
+
|
363 |
+
_HAS_DYNAMIC_ATTRIBUTES = True # Required for pytype checks.
|
364 |
+
|
365 |
+
def __init__(self, model_structure=None, **kwargs):
|
366 |
+
"""Create an instance of `HParams` from keyword arguments.
|
367 |
+
|
368 |
+
The keyword arguments specify name-values pairs for the hyperparameters.
|
369 |
+
The parameter types are inferred from the type of the values passed.
|
370 |
+
|
371 |
+
The parameter names are added as attributes of `HParams` object, so they
|
372 |
+
can be accessed directly with the dot notation `hparams._name_`.
|
373 |
+
|
374 |
+
Example:
|
375 |
+
|
376 |
+
```python
|
377 |
+
# Define 3 hyperparameters: 'learning_rate' is a float parameter,
|
378 |
+
# 'num_hidden_units' an integer parameter, and 'activation' a string
|
379 |
+
# parameter.
|
380 |
+
hparams = tf.HParams(
|
381 |
+
learning_rate=0.1, num_hidden_units=100, activation='relu')
|
382 |
+
|
383 |
+
hparams.activation ==> 'relu'
|
384 |
+
```
|
385 |
+
|
386 |
+
Note that a few names are reserved and cannot be used as hyperparameter
|
387 |
+
names. If you use one of the reserved name the constructor raises a
|
388 |
+
`ValueError`.
|
389 |
+
|
390 |
+
Args:
|
391 |
+
model_structure: An instance of ModelStructure, defining the feature
|
392 |
+
crosses to be used in the Trial.
|
393 |
+
**kwargs: Key-value pairs where the key is the hyperparameter name and
|
394 |
+
the value is the value for the parameter.
|
395 |
+
|
396 |
+
Raises:
|
397 |
+
ValueError: If both `hparam_def` and initialization values are provided,
|
398 |
+
or if one of the arguments is invalid.
|
399 |
+
|
400 |
+
"""
|
401 |
+
# Register the hyperparameters and their type in _hparam_types.
|
402 |
+
# This simplifies the implementation of parse().
|
403 |
+
# _hparam_types maps the parameter name to a tuple (type, bool).
|
404 |
+
# The type value is the type of the parameter for scalar hyperparameters,
|
405 |
+
# or the type of the list elements for multidimensional hyperparameters.
|
406 |
+
# The bool value is True if the value is a list, False otherwise.
|
407 |
+
self._hparam_types = {}
|
408 |
+
self._model_structure = model_structure
|
409 |
+
for name, value in six.iteritems(kwargs):
|
410 |
+
self.add_hparam(name, value)
|
411 |
+
|
412 |
+
def add_hparam(self, name, value):
|
413 |
+
"""Adds {name, value} pair to hyperparameters.
|
414 |
+
|
415 |
+
Args:
|
416 |
+
name: Name of the hyperparameter.
|
417 |
+
value: Value of the hyperparameter. Can be one of the following types:
|
418 |
+
int, float, string, int list, float list, or string list.
|
419 |
+
|
420 |
+
Raises:
|
421 |
+
ValueError: if one of the arguments is invalid.
|
422 |
+
"""
|
423 |
+
# Keys in kwargs are unique, but 'name' could the name of a pre-existing
|
424 |
+
# attribute of this object. In that case we refuse to use it as a
|
425 |
+
# hyperparameter name.
|
426 |
+
if getattr(self, name, None) is not None:
|
427 |
+
raise ValueError("Hyperparameter name is reserved: %s" % name)
|
428 |
+
if isinstance(value, (list, tuple)):
|
429 |
+
if not value:
|
430 |
+
raise ValueError(
|
431 |
+
"Multi-valued hyperparameters cannot be empty: %s" % name
|
432 |
+
)
|
433 |
+
self._hparam_types[name] = (type(value[0]), True)
|
434 |
+
else:
|
435 |
+
self._hparam_types[name] = (type(value), False)
|
436 |
+
setattr(self, name, value)
|
437 |
+
|
438 |
+
def set_hparam(self, name, value):
|
439 |
+
"""Set the value of an existing hyperparameter.
|
440 |
+
|
441 |
+
This function verifies that the type of the value matches the type of the
|
442 |
+
existing hyperparameter.
|
443 |
+
|
444 |
+
Args:
|
445 |
+
name: Name of the hyperparameter.
|
446 |
+
value: New value of the hyperparameter.
|
447 |
+
|
448 |
+
Raises:
|
449 |
+
KeyError: If the hyperparameter doesn't exist.
|
450 |
+
ValueError: If there is a type mismatch.
|
451 |
+
"""
|
452 |
+
param_type, is_list = self._hparam_types[name]
|
453 |
+
if isinstance(value, list):
|
454 |
+
if not is_list:
|
455 |
+
raise ValueError(
|
456 |
+
"Must not pass a list for single-valued parameter: %s" % name
|
457 |
+
)
|
458 |
+
setattr(
|
459 |
+
self,
|
460 |
+
name,
|
461 |
+
[_cast_to_type_if_compatible(name, param_type, v) for v in value],
|
462 |
+
)
|
463 |
+
else:
|
464 |
+
if is_list:
|
465 |
+
raise ValueError(
|
466 |
+
"Must pass a list for multi-valued parameter: %s." % name
|
467 |
+
)
|
468 |
+
setattr(self, name, _cast_to_type_if_compatible(name, param_type, value))
|
469 |
+
|
470 |
+
def del_hparam(self, name):
|
471 |
+
"""Removes the hyperparameter with key 'name'.
|
472 |
+
|
473 |
+
Does nothing if it isn't present.
|
474 |
+
|
475 |
+
Args:
|
476 |
+
name: Name of the hyperparameter.
|
477 |
+
"""
|
478 |
+
if hasattr(self, name):
|
479 |
+
delattr(self, name)
|
480 |
+
del self._hparam_types[name]
|
481 |
+
|
482 |
+
def parse(self, values):
|
483 |
+
"""Override existing hyperparameter values, parsing new values from a string.
|
484 |
+
|
485 |
+
See parse_values for more detail on the allowed format for values.
|
486 |
+
|
487 |
+
Args:
|
488 |
+
values: String. Comma separated list of `name=value` pairs where 'value'
|
489 |
+
must follow the syntax described above.
|
490 |
+
|
491 |
+
Returns:
|
492 |
+
The `HParams` instance.
|
493 |
+
|
494 |
+
Raises:
|
495 |
+
ValueError: If `values` cannot be parsed or a hyperparameter in `values`
|
496 |
+
doesn't exist.
|
497 |
+
"""
|
498 |
+
type_map = {}
|
499 |
+
for name, t in self._hparam_types.items():
|
500 |
+
param_type, _ = t
|
501 |
+
type_map[name] = param_type
|
502 |
+
|
503 |
+
values_map = parse_values(values, type_map)
|
504 |
+
return self.override_from_dict(values_map)
|
505 |
+
|
506 |
+
def override_from_dict(self, values_dict):
|
507 |
+
"""Override existing hyperparameter values, parsing new values from a dictionary.
|
508 |
+
|
509 |
+
Args:
|
510 |
+
values_dict: Dictionary of name:value pairs.
|
511 |
+
|
512 |
+
Returns:
|
513 |
+
The `HParams` instance.
|
514 |
+
|
515 |
+
Raises:
|
516 |
+
KeyError: If a hyperparameter in `values_dict` doesn't exist.
|
517 |
+
ValueError: If `values_dict` cannot be parsed.
|
518 |
+
"""
|
519 |
+
for name, value in values_dict.items():
|
520 |
+
self.set_hparam(name, value)
|
521 |
+
return self
|
522 |
+
|
523 |
+
def set_model_structure(self, model_structure):
|
524 |
+
self._model_structure = model_structure
|
525 |
+
|
526 |
+
def get_model_structure(self):
|
527 |
+
return self._model_structure
|
528 |
+
|
529 |
+
def to_json(self, indent=None, separators=None, sort_keys=False):
|
530 |
+
"""Serializes the hyperparameters into JSON.
|
531 |
+
|
532 |
+
Args:
|
533 |
+
indent: If a non-negative integer, JSON array elements and object members
|
534 |
+
will be pretty-printed with that indent level. An indent level of 0, or
|
535 |
+
negative, will only insert newlines. `None` (the default) selects the
|
536 |
+
most compact representation.
|
537 |
+
separators: Optional `(item_separator, key_separator)` tuple. Default is
|
538 |
+
`(', ', ': ')`.
|
539 |
+
sort_keys: If `True`, the output dictionaries will be sorted by key.
|
540 |
+
|
541 |
+
Returns:
|
542 |
+
A JSON string.
|
543 |
+
"""
|
544 |
+
|
545 |
+
def remove_callables(x):
|
546 |
+
"""Omit callable elements from input with arbitrary nesting."""
|
547 |
+
if isinstance(x, dict):
|
548 |
+
return {
|
549 |
+
k: remove_callables(v)
|
550 |
+
for k, v in six.iteritems(x)
|
551 |
+
if not callable(v)
|
552 |
+
}
|
553 |
+
elif isinstance(x, list):
|
554 |
+
return [remove_callables(i) for i in x if not callable(i)]
|
555 |
+
return x
|
556 |
+
|
557 |
+
return json.dumps(
|
558 |
+
remove_callables(self.values()),
|
559 |
+
indent=indent,
|
560 |
+
separators=separators,
|
561 |
+
sort_keys=sort_keys,
|
562 |
+
)
|
563 |
+
|
564 |
+
def parse_json(self, values_json):
|
565 |
+
"""Override existing hyperparameter values, parsing new values from a json object.
|
566 |
+
|
567 |
+
Args:
|
568 |
+
values_json: String containing a json object of name:value pairs.
|
569 |
+
|
570 |
+
Returns:
|
571 |
+
The `HParams` instance.
|
572 |
+
|
573 |
+
Raises:
|
574 |
+
KeyError: If a hyperparameter in `values_json` doesn't exist.
|
575 |
+
ValueError: If `values_json` cannot be parsed.
|
576 |
+
"""
|
577 |
+
values_map = json.loads(values_json)
|
578 |
+
return self.override_from_dict(values_map)
|
579 |
+
|
580 |
+
def values(self):
|
581 |
+
"""Return the hyperparameter values as a Python dictionary.
|
582 |
+
|
583 |
+
Returns:
|
584 |
+
A dictionary with hyperparameter names as keys. The values are the
|
585 |
+
hyperparameter values.
|
586 |
+
"""
|
587 |
+
return {n: getattr(self, n) for n in self._hparam_types.keys()}
|
588 |
+
|
589 |
+
def get(self, key, default=None):
|
590 |
+
"""Returns the value of `key` if it exists, else `default`."""
|
591 |
+
if key in self._hparam_types:
|
592 |
+
# Ensure that default is compatible with the parameter type.
|
593 |
+
if default is not None:
|
594 |
+
param_type, is_param_list = self._hparam_types[key]
|
595 |
+
type_str = "list<%s>" % param_type if is_param_list else str(param_type)
|
596 |
+
fail_msg = (
|
597 |
+
"Hparam '%s' of type '%s' is incompatible with "
|
598 |
+
"default=%s" % (key, type_str, default)
|
599 |
+
)
|
600 |
+
|
601 |
+
is_default_list = isinstance(default, list)
|
602 |
+
if is_param_list != is_default_list:
|
603 |
+
raise ValueError(fail_msg)
|
604 |
+
|
605 |
+
try:
|
606 |
+
if is_default_list:
|
607 |
+
for value in default:
|
608 |
+
_cast_to_type_if_compatible(key, param_type, value)
|
609 |
+
else:
|
610 |
+
_cast_to_type_if_compatible(key, param_type, default)
|
611 |
+
except ValueError as e:
|
612 |
+
raise ValueError("%s. %s" % (fail_msg, e))
|
613 |
+
|
614 |
+
return getattr(self, key)
|
615 |
+
|
616 |
+
return default
|
617 |
+
|
618 |
+
def __contains__(self, key):
|
619 |
+
return key in self._hparam_types
|
620 |
+
|
621 |
+
def __str__(self):
|
622 |
+
return str(sorted(self.values().items()))
|
623 |
+
|
624 |
+
def __repr__(self):
|
625 |
+
return "%s(%s)" % (type(self).__name__, self.__str__())
|
626 |
+
|
627 |
+
@staticmethod
|
628 |
+
def _get_kind_name(param_type, is_list):
|
629 |
+
"""Returns the field name given parameter type and is_list.
|
630 |
+
|
631 |
+
Args:
|
632 |
+
param_type: Data type of the hparam.
|
633 |
+
is_list: Whether this is a list.
|
634 |
+
|
635 |
+
Returns:
|
636 |
+
A string representation of the field name.
|
637 |
+
|
638 |
+
Raises:
|
639 |
+
ValueError: If parameter type is not recognized.
|
640 |
+
"""
|
641 |
+
if issubclass(param_type, bool):
|
642 |
+
# This check must happen before issubclass(param_type, six.integer_types),
|
643 |
+
# since Python considers bool to be a subclass of int.
|
644 |
+
typename = "bool"
|
645 |
+
elif issubclass(param_type, six.integer_types):
|
646 |
+
# Setting 'int' and 'long' types to be 'int64' to ensure the type is
|
647 |
+
# compatible with both Python2 and Python3.
|
648 |
+
typename = "int64"
|
649 |
+
elif issubclass(param_type, (six.string_types, six.binary_type)):
|
650 |
+
# Setting 'string' and 'bytes' types to be 'bytes' to ensure the type is
|
651 |
+
# compatible with both Python2 and Python3.
|
652 |
+
typename = "bytes"
|
653 |
+
elif issubclass(param_type, float):
|
654 |
+
typename = "float"
|
655 |
+
else:
|
656 |
+
raise ValueError("Unsupported parameter type: %s" % str(param_type))
|
657 |
+
|
658 |
+
suffix = "list" if is_list else "value"
|
659 |
+
return "_".join([typename, suffix])
|
utils/hubert.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
# This code is modified from https://github.com/svc-develop-team/so-vits-svc/blob/4.0/preprocess_hubert_f0.py
|
7 |
+
|
8 |
+
import os
|
9 |
+
import librosa
|
10 |
+
import torch
|
11 |
+
import numpy as np
|
12 |
+
from fairseq import checkpoint_utils
|
13 |
+
from tqdm import tqdm
|
14 |
+
import torch
|
15 |
+
|
16 |
+
|
17 |
+
def load_hubert_model(hps):
|
18 |
+
# Load model
|
19 |
+
ckpt_path = hps.hubert_file
|
20 |
+
print("Load Hubert Model...")
|
21 |
+
|
22 |
+
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
|
23 |
+
[ckpt_path],
|
24 |
+
suffix="",
|
25 |
+
)
|
26 |
+
model = models[0]
|
27 |
+
model.eval()
|
28 |
+
|
29 |
+
if torch.cuda.is_available():
|
30 |
+
model = model.cuda()
|
31 |
+
|
32 |
+
return model
|
33 |
+
|
34 |
+
|
35 |
+
def get_hubert_content(hmodel, wav_16k_tensor):
|
36 |
+
feats = wav_16k_tensor
|
37 |
+
if feats.dim() == 2: # double channels
|
38 |
+
feats = feats.mean(-1)
|
39 |
+
assert feats.dim() == 1, feats.dim()
|
40 |
+
feats = feats.view(1, -1)
|
41 |
+
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
|
42 |
+
inputs = {
|
43 |
+
"source": feats.to(wav_16k_tensor.device),
|
44 |
+
"padding_mask": padding_mask.to(wav_16k_tensor.device),
|
45 |
+
"output_layer": 9, # layer 9
|
46 |
+
}
|
47 |
+
with torch.no_grad():
|
48 |
+
logits = hmodel.extract_features(**inputs)
|
49 |
+
feats = hmodel.final_proj(logits[0]).squeeze(0)
|
50 |
+
|
51 |
+
return feats
|
52 |
+
|
53 |
+
|
54 |
+
def content_vector_encoder(model, audio_path, default_sampling_rate=16000):
|
55 |
+
"""
|
56 |
+
# content vector default sr: 16000
|
57 |
+
"""
|
58 |
+
|
59 |
+
wav16k, sr = librosa.load(audio_path, sr=default_sampling_rate)
|
60 |
+
device = next(model.parameters()).device
|
61 |
+
wav16k = torch.from_numpy(wav16k).to(device)
|
62 |
+
|
63 |
+
# (1, 256, frame_len)
|
64 |
+
content_feature = get_hubert_content(model, wav_16k_tensor=wav16k)
|
65 |
+
|
66 |
+
return content_feature.cpu().detach().numpy()
|
67 |
+
|
68 |
+
|
69 |
+
def repeat_expand_2d(content, target_len):
|
70 |
+
"""
|
71 |
+
content : [hubert_dim(256), src_len]
|
72 |
+
target: [hubert_dim(256), target_len]
|
73 |
+
"""
|
74 |
+
src_len = content.shape[-1]
|
75 |
+
target = torch.zeros([content.shape[0], target_len], dtype=torch.float).to(
|
76 |
+
content.device
|
77 |
+
)
|
78 |
+
temp = torch.arange(src_len + 1) * target_len / src_len
|
79 |
+
current_pos = 0
|
80 |
+
for i in range(target_len):
|
81 |
+
if i < temp[current_pos + 1]:
|
82 |
+
target[:, i] = content[:, current_pos]
|
83 |
+
else:
|
84 |
+
current_pos += 1
|
85 |
+
target[:, i] = content[:, current_pos]
|
86 |
+
|
87 |
+
return target
|
88 |
+
|
89 |
+
|
90 |
+
def get_mapped_features(raw_content_features, mapping_features):
|
91 |
+
"""
|
92 |
+
Content Vector: frameshift = 20ms, hop_size = 480 in 24k
|
93 |
+
|
94 |
+
Now it's only used for mapping to bigvgan's mels (sr = 24k, hop_size = 256, frameshift ~= 10.7 ms)
|
95 |
+
"""
|
96 |
+
source_hop = 480
|
97 |
+
target_hop = 256
|
98 |
+
|
99 |
+
factor = np.gcd(source_hop, target_hop)
|
100 |
+
source_hop //= factor
|
101 |
+
target_hop //= factor
|
102 |
+
print(
|
103 |
+
"Mapping source's {} frames => target's {} frames".format(
|
104 |
+
target_hop, source_hop
|
105 |
+
)
|
106 |
+
)
|
107 |
+
|
108 |
+
results = []
|
109 |
+
for index, mapping_feat in enumerate(tqdm(mapping_features)):
|
110 |
+
# mappping_feat: (mels_frame_len, n_mels)
|
111 |
+
target_len = len(mapping_feat)
|
112 |
+
|
113 |
+
# (source_len, 256)
|
114 |
+
raw_feats = raw_content_features[index][0].cpu().numpy().T
|
115 |
+
source_len, width = raw_feats.shape
|
116 |
+
|
117 |
+
# const ~= target_len * target_hop
|
118 |
+
const = source_len * source_hop // target_hop * target_hop
|
119 |
+
|
120 |
+
# (source_len * source_hop, dim)
|
121 |
+
up_sampling_feats = np.repeat(raw_feats, source_hop, axis=0)
|
122 |
+
# (const, dim) -> (const/target_hop, target_hop, dim) -> (const/target_hop, dim)
|
123 |
+
down_sampling_feats = np.average(
|
124 |
+
up_sampling_feats[:const].reshape(-1, target_hop, width), axis=1
|
125 |
+
)
|
126 |
+
|
127 |
+
err = abs(target_len - len(down_sampling_feats))
|
128 |
+
if err > 3:
|
129 |
+
print("index:", index)
|
130 |
+
print("mels:", mapping_feat.shape)
|
131 |
+
print("raw content vector:", raw_feats.shape)
|
132 |
+
print("up_sampling:", up_sampling_feats.shape)
|
133 |
+
print("down_sampling_feats:", down_sampling_feats.shape)
|
134 |
+
exit()
|
135 |
+
if len(down_sampling_feats) < target_len:
|
136 |
+
# (1, dim) -> (err, dim)
|
137 |
+
end = down_sampling_feats[-1][None, :].repeat(err, axis=0)
|
138 |
+
down_sampling_feats = np.concatenate([down_sampling_feats, end], axis=0)
|
139 |
+
|
140 |
+
# (target_len, dim)
|
141 |
+
feats = down_sampling_feats[:target_len]
|
142 |
+
results.append(feats)
|
143 |
+
|
144 |
+
return results
|
145 |
+
|
146 |
+
|
147 |
+
def extract_hubert_features_of_dataset(datasets, model, out_dir):
|
148 |
+
for utt in tqdm(datasets):
|
149 |
+
uid = utt["Uid"]
|
150 |
+
audio_path = utt["Path"]
|
151 |
+
|
152 |
+
content_vector_feature = content_vector_encoder(model, audio_path) # (T, 256)
|
153 |
+
|
154 |
+
save_path = os.path.join(out_dir, uid + ".npy")
|
155 |
+
np.save(save_path, content_vector_feature)
|
utils/io.py
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import os
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
import torchaudio
|
10 |
+
|
11 |
+
|
12 |
+
def save_feature(process_dir, feature_dir, item, feature, overrides=True):
|
13 |
+
"""Save features to path
|
14 |
+
|
15 |
+
Args:
|
16 |
+
process_dir (str): directory to store features
|
17 |
+
feature_dir (_type_): directory to store one type of features (mel, energy, ...)
|
18 |
+
item (str): uid
|
19 |
+
feature (tensor): feature tensor
|
20 |
+
overrides (bool, optional): whether to override existing files. Defaults to True.
|
21 |
+
"""
|
22 |
+
process_dir = os.path.join(process_dir, feature_dir)
|
23 |
+
os.makedirs(process_dir, exist_ok=True)
|
24 |
+
out_path = os.path.join(process_dir, item + ".npy")
|
25 |
+
|
26 |
+
if os.path.exists(out_path):
|
27 |
+
if overrides:
|
28 |
+
np.save(out_path, feature)
|
29 |
+
else:
|
30 |
+
np.save(out_path, feature)
|
31 |
+
|
32 |
+
|
33 |
+
def save_txt(process_dir, feature_dir, item, feature, overrides=True):
|
34 |
+
process_dir = os.path.join(process_dir, feature_dir)
|
35 |
+
os.makedirs(process_dir, exist_ok=True)
|
36 |
+
out_path = os.path.join(process_dir, item + ".txt")
|
37 |
+
|
38 |
+
if os.path.exists(out_path):
|
39 |
+
if overrides:
|
40 |
+
f = open(out_path, "w")
|
41 |
+
f.writelines(feature)
|
42 |
+
f.close()
|
43 |
+
else:
|
44 |
+
f = open(out_path, "w")
|
45 |
+
f.writelines(feature)
|
46 |
+
f.close()
|
47 |
+
|
48 |
+
|
49 |
+
def save_audio(path, waveform, fs, add_silence=False, turn_up=False, volume_peak=0.9):
|
50 |
+
"""Save audio to path with processing (turn up volume, add silence)
|
51 |
+
Args:
|
52 |
+
path (str): path to save audio
|
53 |
+
waveform (numpy array): waveform to save
|
54 |
+
fs (int): sampling rate
|
55 |
+
add_silence (bool, optional): whether to add silence to beginning and end. Defaults to False.
|
56 |
+
turn_up (bool, optional): whether to turn up volume. Defaults to False.
|
57 |
+
volume_peak (float, optional): volume peak. Defaults to 0.9.
|
58 |
+
"""
|
59 |
+
if turn_up:
|
60 |
+
# continue to turn up to volume_peak
|
61 |
+
ratio = volume_peak / max(waveform.max(), abs(waveform.min()))
|
62 |
+
waveform = waveform * ratio
|
63 |
+
|
64 |
+
if add_silence:
|
65 |
+
silence_len = fs // 20
|
66 |
+
silence = np.zeros((silence_len,), dtype=waveform.dtype)
|
67 |
+
result = np.concatenate([silence, waveform, silence])
|
68 |
+
waveform = result
|
69 |
+
|
70 |
+
waveform = torch.as_tensor(waveform, dtype=torch.float32, device="cpu")
|
71 |
+
if len(waveform.size()) == 1:
|
72 |
+
waveform = waveform[None, :]
|
73 |
+
elif waveform.size(0) != 1:
|
74 |
+
# Stereo to mono
|
75 |
+
waveform = torch.mean(waveform, dim=0, keepdim=True)
|
76 |
+
torchaudio.save(path, waveform, fs, encoding="PCM_S", bits_per_sample=16)
|
77 |
+
|
78 |
+
|
79 |
+
def save_torch_audio(process_dir, feature_dir, item, wav_torch, fs, overrides=True):
|
80 |
+
"""Save torch audio to path without processing
|
81 |
+
Args:
|
82 |
+
process_dir (str): directory to store features
|
83 |
+
feature_dir (_type_): directory to store one type of features (mel, energy, ...)
|
84 |
+
item (str): uid
|
85 |
+
wav_torch (tensor): feature tensor
|
86 |
+
fs (int): sampling rate
|
87 |
+
overrides (bool, optional): whether to override existing files. Defaults to True.
|
88 |
+
"""
|
89 |
+
if wav_torch.shape != 2:
|
90 |
+
wav_torch = wav_torch.unsqueeze(0)
|
91 |
+
|
92 |
+
process_dir = os.path.join(process_dir, feature_dir)
|
93 |
+
os.makedirs(process_dir, exist_ok=True)
|
94 |
+
out_path = os.path.join(process_dir, item + ".wav")
|
95 |
+
|
96 |
+
torchaudio.save(out_path, wav_torch, fs)
|
97 |
+
|
98 |
+
|
99 |
+
async def async_load_audio(path, sample_rate: int = 24000):
|
100 |
+
r"""
|
101 |
+
Args:
|
102 |
+
path: The source loading path.
|
103 |
+
sample_rate: The target sample rate, will automatically resample if necessary.
|
104 |
+
|
105 |
+
Returns:
|
106 |
+
waveform: The waveform object. Should be [1 x sequence_len].
|
107 |
+
"""
|
108 |
+
|
109 |
+
async def use_torchaudio_load(path):
|
110 |
+
return torchaudio.load(path)
|
111 |
+
|
112 |
+
waveform, sr = await use_torchaudio_load(path)
|
113 |
+
waveform = torch.mean(waveform, dim=0, keepdim=True)
|
114 |
+
|
115 |
+
if sr != sample_rate:
|
116 |
+
waveform = torchaudio.functional.resample(waveform, sr, sample_rate)
|
117 |
+
|
118 |
+
if torch.any(torch.isnan(waveform) or torch.isinf(waveform)):
|
119 |
+
raise ValueError("NaN or Inf found in waveform.")
|
120 |
+
return waveform
|
121 |
+
|
122 |
+
|
123 |
+
async def async_save_audio(
|
124 |
+
path,
|
125 |
+
waveform,
|
126 |
+
sample_rate: int = 24000,
|
127 |
+
add_silence: bool = False,
|
128 |
+
volume_peak: float = 0.9,
|
129 |
+
):
|
130 |
+
r"""
|
131 |
+
Args:
|
132 |
+
path: The target saving path.
|
133 |
+
waveform: The waveform object. Should be [n_channel x sequence_len].
|
134 |
+
sample_rate: Sample rate.
|
135 |
+
add_silence: If ``true``, concat 0.05s silence to beginning and end.
|
136 |
+
volume_peak: Turn up volume for larger number, vice versa.
|
137 |
+
"""
|
138 |
+
|
139 |
+
async def use_torchaudio_save(path, waveform, sample_rate):
|
140 |
+
torchaudio.save(
|
141 |
+
path, waveform, sample_rate, encoding="PCM_S", bits_per_sample=16
|
142 |
+
)
|
143 |
+
|
144 |
+
waveform = torch.as_tensor(waveform, device="cpu", dtype=torch.float32)
|
145 |
+
shape = waveform.size()[:-1]
|
146 |
+
|
147 |
+
ratio = abs(volume_peak) / max(waveform.max(), abs(waveform.min()))
|
148 |
+
waveform = waveform * ratio
|
149 |
+
|
150 |
+
if add_silence:
|
151 |
+
silence_len = sample_rate // 20
|
152 |
+
silence = torch.zeros((*shape, silence_len), dtype=waveform.type())
|
153 |
+
waveform = torch.concatenate((silence, waveform, silence), dim=-1)
|
154 |
+
|
155 |
+
if waveform.dim() == 1:
|
156 |
+
waveform = waveform[None]
|
157 |
+
|
158 |
+
await use_torchaudio_save(path, waveform, sample_rate)
|
159 |
+
|
160 |
+
|
161 |
+
def load_mel_extrema(cfg, dataset_name, split):
|
162 |
+
dataset_dir = os.path.join(
|
163 |
+
cfg.OUTPUT_PATH,
|
164 |
+
"preprocess/{}_version".format(cfg.data.process_version),
|
165 |
+
dataset_name,
|
166 |
+
)
|
167 |
+
|
168 |
+
min_file = os.path.join(
|
169 |
+
dataset_dir,
|
170 |
+
"mel_min_max",
|
171 |
+
split.split("_")[-1],
|
172 |
+
"mel_min.npy",
|
173 |
+
)
|
174 |
+
max_file = os.path.join(
|
175 |
+
dataset_dir,
|
176 |
+
"mel_min_max",
|
177 |
+
split.split("_")[-1],
|
178 |
+
"mel_max.npy",
|
179 |
+
)
|
180 |
+
mel_min = np.load(min_file)
|
181 |
+
mel_max = np.load(max_file)
|
182 |
+
return mel_min, mel_max
|
utils/io_optim.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import torch
|
7 |
+
import torchaudio
|
8 |
+
import json
|
9 |
+
import os
|
10 |
+
import numpy as np
|
11 |
+
import librosa
|
12 |
+
import whisper
|
13 |
+
from torch.nn.utils.rnn import pad_sequence
|
14 |
+
|
15 |
+
|
16 |
+
class TorchaudioDataset(torch.utils.data.Dataset):
|
17 |
+
def __init__(self, cfg, dataset, sr, accelerator=None, metadata=None):
|
18 |
+
"""
|
19 |
+
Args:
|
20 |
+
cfg: config
|
21 |
+
dataset: dataset name
|
22 |
+
|
23 |
+
"""
|
24 |
+
assert isinstance(dataset, str)
|
25 |
+
|
26 |
+
self.sr = sr
|
27 |
+
self.cfg = cfg
|
28 |
+
|
29 |
+
if metadata is None:
|
30 |
+
self.train_metadata_path = os.path.join(
|
31 |
+
cfg.preprocess.processed_dir, dataset, cfg.preprocess.train_file
|
32 |
+
)
|
33 |
+
self.valid_metadata_path = os.path.join(
|
34 |
+
cfg.preprocess.processed_dir, dataset, cfg.preprocess.valid_file
|
35 |
+
)
|
36 |
+
self.metadata = self.get_metadata()
|
37 |
+
else:
|
38 |
+
self.metadata = metadata
|
39 |
+
|
40 |
+
if accelerator is not None:
|
41 |
+
self.device = accelerator.device
|
42 |
+
elif torch.cuda.is_available():
|
43 |
+
self.device = torch.device("cuda")
|
44 |
+
else:
|
45 |
+
self.device = torch.device("cpu")
|
46 |
+
|
47 |
+
def get_metadata(self):
|
48 |
+
metadata = []
|
49 |
+
with open(self.train_metadata_path, "r", encoding="utf-8") as t:
|
50 |
+
metadata.extend(json.load(t))
|
51 |
+
with open(self.valid_metadata_path, "r", encoding="utf-8") as v:
|
52 |
+
metadata.extend(json.load(v))
|
53 |
+
return metadata
|
54 |
+
|
55 |
+
def __len__(self):
|
56 |
+
return len(self.metadata)
|
57 |
+
|
58 |
+
def __getitem__(self, index):
|
59 |
+
utt_info = self.metadata[index]
|
60 |
+
wav_path = utt_info["Path"]
|
61 |
+
|
62 |
+
wav, sr = torchaudio.load(wav_path)
|
63 |
+
|
64 |
+
# resample
|
65 |
+
if sr != self.sr:
|
66 |
+
wav = torchaudio.functional.resample(wav, sr, self.sr)
|
67 |
+
# downmixing
|
68 |
+
if wav.shape[0] > 1:
|
69 |
+
wav = torch.mean(wav, dim=0, keepdim=True)
|
70 |
+
assert wav.shape[0] == 1
|
71 |
+
wav = wav.squeeze(0)
|
72 |
+
# record the length of wav without padding
|
73 |
+
length = wav.shape[0]
|
74 |
+
# wav: (T)
|
75 |
+
return utt_info, wav, length
|
76 |
+
|
77 |
+
|
78 |
+
class LibrosaDataset(TorchaudioDataset):
|
79 |
+
def __init__(self, cfg, dataset, sr, accelerator=None, metadata=None):
|
80 |
+
super().__init__(cfg, dataset, sr, accelerator, metadata)
|
81 |
+
|
82 |
+
def __getitem__(self, index):
|
83 |
+
utt_info = self.metadata[index]
|
84 |
+
wav_path = utt_info["Path"]
|
85 |
+
|
86 |
+
wav, _ = librosa.load(wav_path, sr=self.sr)
|
87 |
+
# wav: (T)
|
88 |
+
wav = torch.from_numpy(wav)
|
89 |
+
|
90 |
+
# record the length of wav without padding
|
91 |
+
length = wav.shape[0]
|
92 |
+
return utt_info, wav, length
|
93 |
+
|
94 |
+
|
95 |
+
class FFmpegDataset(TorchaudioDataset):
|
96 |
+
def __init__(self, cfg, dataset, sr, accelerator=None, metadata=None):
|
97 |
+
super().__init__(cfg, dataset, sr, accelerator, metadata)
|
98 |
+
|
99 |
+
def __getitem__(self, index):
|
100 |
+
utt_info = self.metadata[index]
|
101 |
+
wav_path = utt_info["Path"]
|
102 |
+
|
103 |
+
# wav: (T,)
|
104 |
+
wav = whisper.load_audio(wav_path, sr=16000) # sr = 16000
|
105 |
+
# convert to torch tensor
|
106 |
+
wav = torch.from_numpy(wav)
|
107 |
+
# record the length of wav without padding
|
108 |
+
length = wav.shape[0]
|
109 |
+
|
110 |
+
return utt_info, wav, length
|
111 |
+
|
112 |
+
|
113 |
+
def collate_batch(batch_list):
|
114 |
+
"""
|
115 |
+
Args:
|
116 |
+
batch_list: list of (metadata, wav, length)
|
117 |
+
"""
|
118 |
+
metadata = [item[0] for item in batch_list]
|
119 |
+
# wavs: (B, T)
|
120 |
+
wavs = pad_sequence([item[1] for item in batch_list], batch_first=True)
|
121 |
+
lens = [item[2] for item in batch_list]
|
122 |
+
|
123 |
+
return metadata, wavs, lens
|
utils/mel.py
ADDED
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from librosa.filters import mel as librosa_mel_fn
|
8 |
+
|
9 |
+
|
10 |
+
def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
|
11 |
+
# Min value: ln(1e-5) = -11.5129
|
12 |
+
return torch.log(torch.clamp(x, min=clip_val) * C)
|
13 |
+
|
14 |
+
|
15 |
+
def spectral_normalize_torch(magnitudes):
|
16 |
+
output = dynamic_range_compression_torch(magnitudes)
|
17 |
+
return output
|
18 |
+
|
19 |
+
|
20 |
+
def extract_linear_features(y, cfg, center=False):
|
21 |
+
if torch.min(y) < -1.0:
|
22 |
+
print("min value is ", torch.min(y))
|
23 |
+
if torch.max(y) > 1.0:
|
24 |
+
print("max value is ", torch.max(y))
|
25 |
+
|
26 |
+
global hann_window
|
27 |
+
hann_window[str(y.device)] = torch.hann_window(cfg.win_size).to(y.device)
|
28 |
+
|
29 |
+
y = torch.nn.functional.pad(
|
30 |
+
y.unsqueeze(1),
|
31 |
+
(int((cfg.n_fft - cfg.hop_size) / 2), int((cfg.n_fft - cfg.hop_size) / 2)),
|
32 |
+
mode="reflect",
|
33 |
+
)
|
34 |
+
y = y.squeeze(1)
|
35 |
+
|
36 |
+
# complex tensor as default, then use view_as_real for future pytorch compatibility
|
37 |
+
spec = torch.stft(
|
38 |
+
y,
|
39 |
+
cfg.n_fft,
|
40 |
+
hop_length=cfg.hop_size,
|
41 |
+
win_length=cfg.win_size,
|
42 |
+
window=hann_window[str(y.device)],
|
43 |
+
center=center,
|
44 |
+
pad_mode="reflect",
|
45 |
+
normalized=False,
|
46 |
+
onesided=True,
|
47 |
+
return_complex=True,
|
48 |
+
)
|
49 |
+
spec = torch.view_as_real(spec)
|
50 |
+
spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
|
51 |
+
spec = torch.squeeze(spec, 0)
|
52 |
+
return spec
|
53 |
+
|
54 |
+
|
55 |
+
def mel_spectrogram_torch(y, cfg, center=False):
|
56 |
+
"""
|
57 |
+
TODO: to merge this funtion with the extract_mel_features below
|
58 |
+
"""
|
59 |
+
if torch.min(y) < -1.0:
|
60 |
+
print("min value is ", torch.min(y))
|
61 |
+
if torch.max(y) > 1.0:
|
62 |
+
print("max value is ", torch.max(y))
|
63 |
+
|
64 |
+
global mel_basis, hann_window
|
65 |
+
if cfg.fmax not in mel_basis:
|
66 |
+
mel = librosa_mel_fn(
|
67 |
+
sr=cfg.sample_rate,
|
68 |
+
n_fft=cfg.n_fft,
|
69 |
+
n_mels=cfg.n_mel,
|
70 |
+
fmin=cfg.fmin,
|
71 |
+
fmax=cfg.fmax,
|
72 |
+
)
|
73 |
+
mel_basis[str(cfg.fmax) + "_" + str(y.device)] = (
|
74 |
+
torch.from_numpy(mel).float().to(y.device)
|
75 |
+
)
|
76 |
+
hann_window[str(y.device)] = torch.hann_window(cfg.win_size).to(y.device)
|
77 |
+
|
78 |
+
y = torch.nn.functional.pad(
|
79 |
+
y.unsqueeze(1),
|
80 |
+
(int((cfg.n_fft - cfg.hop_size) / 2), int((cfg.n_fft - cfg.hop_size) / 2)),
|
81 |
+
mode="reflect",
|
82 |
+
)
|
83 |
+
y = y.squeeze(1)
|
84 |
+
|
85 |
+
spec = torch.stft(
|
86 |
+
y,
|
87 |
+
cfg.n_fft,
|
88 |
+
hop_length=cfg.hop_size,
|
89 |
+
win_length=cfg.win_size,
|
90 |
+
window=hann_window[str(y.device)],
|
91 |
+
center=center,
|
92 |
+
pad_mode="reflect",
|
93 |
+
normalized=False,
|
94 |
+
onesided=True,
|
95 |
+
return_complex=True,
|
96 |
+
)
|
97 |
+
|
98 |
+
spec = torch.view_as_real(spec)
|
99 |
+
spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
|
100 |
+
|
101 |
+
spec = torch.matmul(mel_basis[str(cfg.fmax) + "_" + str(y.device)], spec)
|
102 |
+
spec = spectral_normalize_torch(spec)
|
103 |
+
|
104 |
+
return spec
|
105 |
+
|
106 |
+
|
107 |
+
mel_basis = {}
|
108 |
+
hann_window = {}
|
109 |
+
|
110 |
+
|
111 |
+
def extract_mel_features(
|
112 |
+
y,
|
113 |
+
cfg,
|
114 |
+
center=False,
|
115 |
+
):
|
116 |
+
"""Extract mel features
|
117 |
+
|
118 |
+
Args:
|
119 |
+
y (tensor): audio data in tensor
|
120 |
+
cfg (dict): configuration in cfg.preprocess
|
121 |
+
center (bool, optional): In STFT, whether t-th frame is centered at time t*hop_length. Defaults to False.
|
122 |
+
|
123 |
+
Returns:
|
124 |
+
tensor: a tensor containing the mel feature calculated based on STFT result
|
125 |
+
"""
|
126 |
+
if torch.min(y) < -1.0:
|
127 |
+
print("min value is ", torch.min(y))
|
128 |
+
if torch.max(y) > 1.0:
|
129 |
+
print("max value is ", torch.max(y))
|
130 |
+
|
131 |
+
global mel_basis, hann_window
|
132 |
+
if cfg.fmax not in mel_basis:
|
133 |
+
mel = librosa_mel_fn(
|
134 |
+
sr=cfg.sample_rate,
|
135 |
+
n_fft=cfg.n_fft,
|
136 |
+
n_mels=cfg.n_mel,
|
137 |
+
fmin=cfg.fmin,
|
138 |
+
fmax=cfg.fmax,
|
139 |
+
)
|
140 |
+
mel_basis[str(cfg.fmax) + "_" + str(y.device)] = (
|
141 |
+
torch.from_numpy(mel).float().to(y.device)
|
142 |
+
)
|
143 |
+
hann_window[str(y.device)] = torch.hann_window(cfg.win_size).to(y.device)
|
144 |
+
|
145 |
+
y = torch.nn.functional.pad(
|
146 |
+
y.unsqueeze(1),
|
147 |
+
(int((cfg.n_fft - cfg.hop_size) / 2), int((cfg.n_fft - cfg.hop_size) / 2)),
|
148 |
+
mode="reflect",
|
149 |
+
)
|
150 |
+
y = y.squeeze(1)
|
151 |
+
|
152 |
+
# complex tensor as default, then use view_as_real for future pytorch compatibility
|
153 |
+
spec = torch.stft(
|
154 |
+
y,
|
155 |
+
cfg.n_fft,
|
156 |
+
hop_length=cfg.hop_size,
|
157 |
+
win_length=cfg.win_size,
|
158 |
+
window=hann_window[str(y.device)],
|
159 |
+
center=center,
|
160 |
+
pad_mode="reflect",
|
161 |
+
normalized=False,
|
162 |
+
onesided=True,
|
163 |
+
return_complex=True,
|
164 |
+
)
|
165 |
+
spec = torch.view_as_real(spec)
|
166 |
+
spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
|
167 |
+
|
168 |
+
spec = torch.matmul(mel_basis[str(cfg.fmax) + "_" + str(y.device)], spec)
|
169 |
+
spec = spectral_normalize_torch(spec)
|
170 |
+
return spec.squeeze(0)
|
171 |
+
|
172 |
+
|
173 |
+
def extract_mel_features_tts(
|
174 |
+
y,
|
175 |
+
cfg,
|
176 |
+
center=False,
|
177 |
+
taco=False,
|
178 |
+
_stft=None,
|
179 |
+
):
|
180 |
+
"""Extract mel features
|
181 |
+
|
182 |
+
Args:
|
183 |
+
y (tensor): audio data in tensor
|
184 |
+
cfg (dict): configuration in cfg.preprocess
|
185 |
+
center (bool, optional): In STFT, whether t-th frame is centered at time t*hop_length. Defaults to False.
|
186 |
+
taco: use tacotron mel
|
187 |
+
|
188 |
+
Returns:
|
189 |
+
tensor: a tensor containing the mel feature calculated based on STFT result
|
190 |
+
"""
|
191 |
+
if not taco:
|
192 |
+
if torch.min(y) < -1.0:
|
193 |
+
print("min value is ", torch.min(y))
|
194 |
+
if torch.max(y) > 1.0:
|
195 |
+
print("max value is ", torch.max(y))
|
196 |
+
|
197 |
+
global mel_basis, hann_window
|
198 |
+
if cfg.fmax not in mel_basis:
|
199 |
+
mel = librosa_mel_fn(
|
200 |
+
sr=cfg.sample_rate,
|
201 |
+
n_fft=cfg.n_fft,
|
202 |
+
n_mels=cfg.n_mel,
|
203 |
+
fmin=cfg.fmin,
|
204 |
+
fmax=cfg.fmax,
|
205 |
+
)
|
206 |
+
mel_basis[str(cfg.fmax) + "_" + str(y.device)] = (
|
207 |
+
torch.from_numpy(mel).float().to(y.device)
|
208 |
+
)
|
209 |
+
hann_window[str(y.device)] = torch.hann_window(cfg.win_size).to(y.device)
|
210 |
+
|
211 |
+
y = torch.nn.functional.pad(
|
212 |
+
y.unsqueeze(1),
|
213 |
+
(int((cfg.n_fft - cfg.hop_size) / 2), int((cfg.n_fft - cfg.hop_size) / 2)),
|
214 |
+
mode="reflect",
|
215 |
+
)
|
216 |
+
y = y.squeeze(1)
|
217 |
+
|
218 |
+
# complex tensor as default, then use view_as_real for future pytorch compatibility
|
219 |
+
spec = torch.stft(
|
220 |
+
y,
|
221 |
+
cfg.n_fft,
|
222 |
+
hop_length=cfg.hop_size,
|
223 |
+
win_length=cfg.win_size,
|
224 |
+
window=hann_window[str(y.device)],
|
225 |
+
center=center,
|
226 |
+
pad_mode="reflect",
|
227 |
+
normalized=False,
|
228 |
+
onesided=True,
|
229 |
+
return_complex=True,
|
230 |
+
)
|
231 |
+
spec = torch.view_as_real(spec)
|
232 |
+
spec = torch.sqrt(spec.pow(2).sum(-1) + (1e-9))
|
233 |
+
|
234 |
+
spec = torch.matmul(mel_basis[str(cfg.fmax) + "_" + str(y.device)], spec)
|
235 |
+
spec = spectral_normalize_torch(spec)
|
236 |
+
else:
|
237 |
+
audio = torch.clip(y, -1, 1)
|
238 |
+
audio = torch.autograd.Variable(audio, requires_grad=False)
|
239 |
+
spec, energy = _stft.mel_spectrogram(audio)
|
240 |
+
|
241 |
+
return spec.squeeze(0)
|
242 |
+
|
243 |
+
|
244 |
+
def amplitude_phase_spectrum(y, cfg):
|
245 |
+
hann_window = torch.hann_window(cfg.win_size).to(y.device)
|
246 |
+
|
247 |
+
y = torch.nn.functional.pad(
|
248 |
+
y.unsqueeze(1),
|
249 |
+
(int((cfg.n_fft - cfg.hop_size) / 2), int((cfg.n_fft - cfg.hop_size) / 2)),
|
250 |
+
mode="reflect",
|
251 |
+
)
|
252 |
+
y = y.squeeze(1)
|
253 |
+
|
254 |
+
stft_spec = torch.stft(
|
255 |
+
y,
|
256 |
+
cfg.n_fft,
|
257 |
+
hop_length=cfg.hop_size,
|
258 |
+
win_length=cfg.win_size,
|
259 |
+
window=hann_window,
|
260 |
+
center=False,
|
261 |
+
return_complex=True,
|
262 |
+
)
|
263 |
+
|
264 |
+
stft_spec = torch.view_as_real(stft_spec)
|
265 |
+
if stft_spec.size()[0] == 1:
|
266 |
+
stft_spec = stft_spec.squeeze(0)
|
267 |
+
|
268 |
+
if len(list(stft_spec.size())) == 4:
|
269 |
+
rea = stft_spec[:, :, :, 0] # [batch_size, n_fft//2+1, frames]
|
270 |
+
imag = stft_spec[:, :, :, 1] # [batch_size, n_fft//2+1, frames]
|
271 |
+
else:
|
272 |
+
rea = stft_spec[:, :, 0] # [n_fft//2+1, frames]
|
273 |
+
imag = stft_spec[:, :, 1] # [n_fft//2+1, frames]
|
274 |
+
|
275 |
+
log_amplitude = torch.log(
|
276 |
+
torch.abs(torch.sqrt(torch.pow(rea, 2) + torch.pow(imag, 2))) + 1e-5
|
277 |
+
) # [n_fft//2+1, frames]
|
278 |
+
phase = torch.atan2(imag, rea) # [n_fft//2+1, frames]
|
279 |
+
|
280 |
+
return log_amplitude, phase, rea, imag
|
utils/mert.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
# This code is modified from https://huggingface.co/m-a-p/MERT-v1-330M
|
7 |
+
|
8 |
+
import torch
|
9 |
+
from tqdm import tqdm
|
10 |
+
import numpy as np
|
11 |
+
|
12 |
+
from transformers import Wav2Vec2FeatureExtractor
|
13 |
+
from transformers import AutoModel
|
14 |
+
import torchaudio
|
15 |
+
import torchaudio.transforms as T
|
16 |
+
from sklearn.preprocessing import StandardScaler
|
17 |
+
|
18 |
+
|
19 |
+
def mert_encoder(model, processor, audio_path, hps):
|
20 |
+
"""
|
21 |
+
# mert default sr: 24000
|
22 |
+
"""
|
23 |
+
with torch.no_grad():
|
24 |
+
resample_rate = processor.sampling_rate
|
25 |
+
device = next(model.parameters()).device
|
26 |
+
|
27 |
+
input_audio, sampling_rate = torchaudio.load(audio_path)
|
28 |
+
input_audio = input_audio.squeeze()
|
29 |
+
|
30 |
+
if sampling_rate != resample_rate:
|
31 |
+
resampler = T.Resample(sampling_rate, resample_rate)
|
32 |
+
input_audio = resampler(input_audio)
|
33 |
+
|
34 |
+
inputs = processor(
|
35 |
+
input_audio, sampling_rate=resample_rate, return_tensors="pt"
|
36 |
+
).to(
|
37 |
+
device
|
38 |
+
) # {input_values: tensor, attention_mask: tensor}
|
39 |
+
|
40 |
+
outputs = model(**inputs, output_hidden_states=True) # list: len is 25
|
41 |
+
|
42 |
+
# [25 layer, Time steps, 1024 feature_dim]
|
43 |
+
# all_layer_hidden_states = torch.stack(outputs.hidden_states).squeeze()
|
44 |
+
# mert_features.append(all_layer_hidden_states)
|
45 |
+
|
46 |
+
feature = outputs.hidden_states[
|
47 |
+
hps.mert_feature_layer
|
48 |
+
].squeeze() # [1, frame len, 1024] -> [frame len, 1024]
|
49 |
+
|
50 |
+
return feature.cpu().detach().numpy()
|
51 |
+
|
52 |
+
|
53 |
+
def mert_features_normalization(raw_mert_features):
|
54 |
+
normalized_mert_features = list()
|
55 |
+
|
56 |
+
mert_features = np.array(raw_mert_features)
|
57 |
+
scaler = StandardScaler().fit(mert_features)
|
58 |
+
for raw_mert_feature in raw_mert_feature:
|
59 |
+
normalized_mert_feature = scaler.transform(raw_mert_feature)
|
60 |
+
normalized_mert_features.append(normalized_mert_feature)
|
61 |
+
return normalized_mert_features
|
62 |
+
|
63 |
+
|
64 |
+
def get_mapped_mert_features(raw_mert_features, mapping_features, fast_mapping=True):
|
65 |
+
source_hop = 320
|
66 |
+
target_hop = 256
|
67 |
+
|
68 |
+
factor = np.gcd(source_hop, target_hop)
|
69 |
+
source_hop //= factor
|
70 |
+
target_hop //= factor
|
71 |
+
print(
|
72 |
+
"Mapping source's {} frames => target's {} frames".format(
|
73 |
+
target_hop, source_hop
|
74 |
+
)
|
75 |
+
)
|
76 |
+
|
77 |
+
mert_features = []
|
78 |
+
for index, mapping_feat in enumerate(tqdm(mapping_features)):
|
79 |
+
# mapping_feat: (mels_frame_len, n_mels)
|
80 |
+
target_len = mapping_feat.shape[0]
|
81 |
+
|
82 |
+
# (frame_len, 1024)
|
83 |
+
raw_feats = raw_mert_features[index].cpu().numpy()
|
84 |
+
source_len, width = raw_feats.shape
|
85 |
+
|
86 |
+
# const ~= target_len * target_hop
|
87 |
+
const = source_len * source_hop // target_hop * target_hop
|
88 |
+
|
89 |
+
# (source_len * source_hop, dim)
|
90 |
+
up_sampling_feats = np.repeat(raw_feats, source_hop, axis=0)
|
91 |
+
# (const, dim) -> (const/target_hop, target_hop, dim) -> (const/target_hop, dim)
|
92 |
+
down_sampling_feats = np.average(
|
93 |
+
up_sampling_feats[:const].reshape(-1, target_hop, width), axis=1
|
94 |
+
)
|
95 |
+
|
96 |
+
err = abs(target_len - len(down_sampling_feats))
|
97 |
+
if err > 3:
|
98 |
+
print("index:", index)
|
99 |
+
print("mels:", mapping_feat.shape)
|
100 |
+
print("raw mert vector:", raw_feats.shape)
|
101 |
+
print("up_sampling:", up_sampling_feats.shape)
|
102 |
+
print("const:", const)
|
103 |
+
print("down_sampling_feats:", down_sampling_feats.shape)
|
104 |
+
exit()
|
105 |
+
if len(down_sampling_feats) < target_len:
|
106 |
+
# (1, dim) -> (err, dim)
|
107 |
+
end = down_sampling_feats[-1][None, :].repeat(err, axis=0)
|
108 |
+
down_sampling_feats = np.concatenate([down_sampling_feats, end], axis=0)
|
109 |
+
|
110 |
+
# (target_len, dim)
|
111 |
+
feats = down_sampling_feats[:target_len]
|
112 |
+
mert_features.append(feats)
|
113 |
+
|
114 |
+
return mert_features
|
115 |
+
|
116 |
+
|
117 |
+
def load_mert_model(hps):
|
118 |
+
print("Loading MERT Model: ", hps.mert_model)
|
119 |
+
|
120 |
+
# Load model
|
121 |
+
model_name = hps.mert_model
|
122 |
+
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
|
123 |
+
|
124 |
+
if torch.cuda.is_available():
|
125 |
+
model = model.cuda()
|
126 |
+
|
127 |
+
# model = model.eval()
|
128 |
+
|
129 |
+
preprocessor = Wav2Vec2FeatureExtractor.from_pretrained(
|
130 |
+
model_name, trust_remote_code=True
|
131 |
+
)
|
132 |
+
return model, preprocessor
|
133 |
+
|
134 |
+
|
135 |
+
# loading the corresponding preprocessor config
|
136 |
+
# def load_preprocessor (model_name="m-a-p/MERT-v1-330M"):
|
137 |
+
# print('load_preprocessor...')
|
138 |
+
# preprocessor = Wav2Vec2FeatureExtractor.from_pretrained(model_name,trust_remote_code=True)
|
139 |
+
# return preprocessor
|
utils/mfa_prepare.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
""" This code is modified from https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/performance.html"""
|
7 |
+
|
8 |
+
import os
|
9 |
+
import subprocess
|
10 |
+
from multiprocessing import Pool
|
11 |
+
from tqdm import tqdm
|
12 |
+
import torchaudio
|
13 |
+
from pathlib import Path
|
14 |
+
|
15 |
+
|
16 |
+
def remove_empty_dirs(path):
|
17 |
+
"""remove empty directories in a given path"""
|
18 |
+
# Check if the given path is a directory
|
19 |
+
if not os.path.isdir(path):
|
20 |
+
print(f"{path} is not a directory")
|
21 |
+
return
|
22 |
+
|
23 |
+
# Walk through all directories and subdirectories
|
24 |
+
for root, dirs, _ in os.walk(path, topdown=False):
|
25 |
+
for dir in dirs:
|
26 |
+
dir_path = os.path.join(root, dir)
|
27 |
+
# Check if the directory is empty
|
28 |
+
if not os.listdir(dir_path):
|
29 |
+
os.rmdir(dir_path) # "Removed empty directory
|
30 |
+
|
31 |
+
|
32 |
+
def process_single_wav_file(task):
|
33 |
+
"""process a single wav file"""
|
34 |
+
wav_file, output_dir = task
|
35 |
+
speaker_id, book_name, filename = Path(wav_file).parts[-3:]
|
36 |
+
|
37 |
+
output_book_dir = Path(output_dir, speaker_id)
|
38 |
+
output_book_dir.mkdir(parents=True, exist_ok=True)
|
39 |
+
new_filename = f"{speaker_id}_{book_name}_{filename}"
|
40 |
+
|
41 |
+
new_wav_file = Path(output_book_dir, new_filename)
|
42 |
+
command = [
|
43 |
+
"ffmpeg",
|
44 |
+
"-nostdin",
|
45 |
+
"-hide_banner",
|
46 |
+
"-loglevel",
|
47 |
+
"error",
|
48 |
+
"-nostats",
|
49 |
+
"-i",
|
50 |
+
wav_file,
|
51 |
+
"-acodec",
|
52 |
+
"pcm_s16le",
|
53 |
+
"-ar",
|
54 |
+
"16000",
|
55 |
+
new_wav_file,
|
56 |
+
]
|
57 |
+
subprocess.check_call(
|
58 |
+
command
|
59 |
+
) # Run the command to convert the file to 16kHz and 16-bit PCM
|
60 |
+
os.remove(wav_file)
|
61 |
+
|
62 |
+
|
63 |
+
def process_wav_files(wav_files, output_dir, n_process):
|
64 |
+
"""process wav files in parallel"""
|
65 |
+
tasks = [(wav_file, output_dir) for wav_file in wav_files]
|
66 |
+
print(f"Processing {len(tasks)} files")
|
67 |
+
with Pool(processes=n_process) as pool:
|
68 |
+
for _ in tqdm(
|
69 |
+
pool.imap_unordered(process_single_wav_file, tasks), total=len(tasks)
|
70 |
+
):
|
71 |
+
pass
|
72 |
+
print("Removing empty directories...")
|
73 |
+
remove_empty_dirs(output_dir)
|
74 |
+
print("Done!")
|
75 |
+
|
76 |
+
|
77 |
+
def get_wav_files(dataset_path):
|
78 |
+
"""get all wav files in the dataset"""
|
79 |
+
wav_files = []
|
80 |
+
for speaker_id in os.listdir(dataset_path):
|
81 |
+
speaker_dir = os.path.join(dataset_path, speaker_id)
|
82 |
+
if not os.path.isdir(speaker_dir):
|
83 |
+
continue
|
84 |
+
for book_name in os.listdir(speaker_dir):
|
85 |
+
book_dir = os.path.join(speaker_dir, book_name)
|
86 |
+
if not os.path.isdir(book_dir):
|
87 |
+
continue
|
88 |
+
for file in os.listdir(book_dir):
|
89 |
+
if file.endswith(".wav"):
|
90 |
+
wav_files.append(os.path.join(book_dir, file))
|
91 |
+
print("Found {} wav files".format(len(wav_files)))
|
92 |
+
return wav_files
|
93 |
+
|
94 |
+
|
95 |
+
def filter_wav_files_by_length(wav_files, max_len_sec=15):
|
96 |
+
"""filter wav files by length"""
|
97 |
+
print("original wav files: {}".format(len(wav_files)))
|
98 |
+
filtered_wav_files = []
|
99 |
+
for audio_file in wav_files:
|
100 |
+
metadata = torchaudio.info(str(audio_file))
|
101 |
+
audio_length = metadata.num_frames / metadata.sample_rate
|
102 |
+
if audio_length <= max_len_sec:
|
103 |
+
filtered_wav_files.append(audio_file)
|
104 |
+
else:
|
105 |
+
os.remove(audio_file)
|
106 |
+
print("filtered wav files: {}".format(len(filtered_wav_files)))
|
107 |
+
return filtered_wav_files
|
108 |
+
|
109 |
+
|
110 |
+
if __name__ == "__main__":
|
111 |
+
dataset_path = "/path/to/output/directory"
|
112 |
+
n_process = 16
|
113 |
+
max_len_sec = 15
|
114 |
+
wav_files = get_wav_files(dataset_path)
|
115 |
+
filtered_wav_files = filter_wav_files_by_length(wav_files, max_len_sec)
|
116 |
+
process_wav_files(filtered_wav_files, dataset_path, n_process)
|
utils/model_summary.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import humanfriendly
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
|
10 |
+
|
11 |
+
def get_human_readable_count(number: int) -> str:
|
12 |
+
"""Return human_readable_count
|
13 |
+
|
14 |
+
Originated from:
|
15 |
+
https://github.com/PyTorchLightning/pytorch-lightning/blob/master/pytorch_lightning/core/memory.py
|
16 |
+
|
17 |
+
Abbreviates an integer number with K, M, B, T for thousands, millions,
|
18 |
+
billions and trillions, respectively.
|
19 |
+
Examples:
|
20 |
+
>>> get_human_readable_count(123)
|
21 |
+
'123 '
|
22 |
+
>>> get_human_readable_count(1234) # (one thousand)
|
23 |
+
'1 K'
|
24 |
+
>>> get_human_readable_count(2e6) # (two million)
|
25 |
+
'2 M'
|
26 |
+
>>> get_human_readable_count(3e9) # (three billion)
|
27 |
+
'3 B'
|
28 |
+
>>> get_human_readable_count(4e12) # (four trillion)
|
29 |
+
'4 T'
|
30 |
+
>>> get_human_readable_count(5e15) # (more than trillion)
|
31 |
+
'5,000 T'
|
32 |
+
Args:
|
33 |
+
number: a positive integer number
|
34 |
+
Return:
|
35 |
+
A string formatted according to the pattern described above.
|
36 |
+
"""
|
37 |
+
assert number >= 0
|
38 |
+
labels = [" ", "K", "M", "B", "T"]
|
39 |
+
num_digits = int(np.floor(np.log10(number)) + 1 if number > 0 else 1)
|
40 |
+
num_groups = int(np.ceil(num_digits / 3))
|
41 |
+
num_groups = min(num_groups, len(labels))
|
42 |
+
shift = -3 * (num_groups - 1)
|
43 |
+
number = number * (10**shift)
|
44 |
+
index = num_groups - 1
|
45 |
+
return f"{number:.2f} {labels[index]}"
|
46 |
+
|
47 |
+
|
48 |
+
def to_bytes(dtype) -> int:
|
49 |
+
return int(str(dtype)[-2:]) // 8
|
50 |
+
|
51 |
+
|
52 |
+
def model_summary(model: torch.nn.Module) -> str:
|
53 |
+
message = "Model structure:\n"
|
54 |
+
message += str(model)
|
55 |
+
tot_params = sum(p.numel() for p in model.parameters())
|
56 |
+
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
57 |
+
percent_trainable = "{:.1f}".format(num_params * 100.0 / tot_params)
|
58 |
+
tot_params = get_human_readable_count(tot_params)
|
59 |
+
num_params = get_human_readable_count(num_params)
|
60 |
+
message += "\n\nModel summary:\n"
|
61 |
+
message += f" Class Name: {model.__class__.__name__}\n"
|
62 |
+
message += f" Total Number of model parameters: {tot_params}\n"
|
63 |
+
message += (
|
64 |
+
f" Number of trainable parameters: {num_params} ({percent_trainable}%)\n"
|
65 |
+
)
|
66 |
+
num_bytes = humanfriendly.format_size(
|
67 |
+
sum(
|
68 |
+
p.numel() * to_bytes(p.dtype) for p in model.parameters() if p.requires_grad
|
69 |
+
)
|
70 |
+
)
|
71 |
+
message += f" Size: {num_bytes}\n"
|
72 |
+
dtype = next(iter(model.parameters())).dtype
|
73 |
+
message += f" Type: {dtype}"
|
74 |
+
return message
|
utils/prompt_preparer.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import torch
|
7 |
+
|
8 |
+
|
9 |
+
class PromptPreparer:
|
10 |
+
def prepare_prompts(self, y, y_lens, codes, nar_stage, y_prompts_codes):
|
11 |
+
if self.prefix_mode == 0:
|
12 |
+
y_emb, prefix_len = self._handle_prefix_mode_0(y, codes, nar_stage)
|
13 |
+
elif self.prefix_mode == 1:
|
14 |
+
y_emb, prefix_len = self._handle_prefix_mode_1(y, y_lens, codes, nar_stage)
|
15 |
+
elif self.prefix_mode in [2, 4]:
|
16 |
+
y_emb, prefix_len = self._handle_prefix_mode_2_4(
|
17 |
+
y, y_lens, codes, nar_stage, y_prompts_codes
|
18 |
+
)
|
19 |
+
else:
|
20 |
+
raise ValueError("Invalid prefix mode")
|
21 |
+
|
22 |
+
return y_emb, prefix_len
|
23 |
+
|
24 |
+
def _handle_prefix_mode_0(self, y, codes, nar_stage):
|
25 |
+
prefix_len = 0
|
26 |
+
y_emb = self.nar_audio_embeddings[0](y)
|
27 |
+
for j in range(1, nar_stage):
|
28 |
+
y_emb = y_emb + self.nar_audio_embeddings[j](codes[..., j])
|
29 |
+
return y_emb, 0
|
30 |
+
|
31 |
+
def _handle_prefix_mode_1(self, y, y_lens, codes, nar_stage):
|
32 |
+
int_low = (0.25 * y_lens.min()).type(torch.int64).item()
|
33 |
+
prefix_len = torch.randint(int_low, int_low * 2, size=()).item()
|
34 |
+
prefix_len = min(prefix_len, 225)
|
35 |
+
|
36 |
+
y_prompts = self.nar_audio_embeddings[0](y[:, :prefix_len])
|
37 |
+
y_emb = self.nar_audio_embeddings[0](y[:, prefix_len:])
|
38 |
+
for j in range(1, self.num_quantizers):
|
39 |
+
y_prompts += self.nar_audio_embeddings[j](codes[:, :prefix_len, j])
|
40 |
+
if j < nar_stage:
|
41 |
+
y_emb += self.nar_audio_embeddings[j](codes[:, prefix_len:, j])
|
42 |
+
y_emb = torch.concat([y_prompts, y_emb], axis=1)
|
43 |
+
return y_emb, prefix_len
|
44 |
+
|
45 |
+
def _handle_prefix_mode_2_4(self, y, y_lens, codes, nar_stage, y_prompts_codes):
|
46 |
+
if self.prefix_mode == 2:
|
47 |
+
prefix_len = min(225, int(0.25 * y_lens.min().item()))
|
48 |
+
|
49 |
+
y_prompts_codes = []
|
50 |
+
for b in range(codes.shape[0]):
|
51 |
+
start = self.rng.randint(0, y_lens[b].item() - prefix_len)
|
52 |
+
y_prompts_codes.append(
|
53 |
+
torch.clone(codes[b, start : start + prefix_len])
|
54 |
+
)
|
55 |
+
codes[b, start : start + prefix_len, nar_stage] = self.audio_token_num
|
56 |
+
y_prompts_codes = torch.stack(y_prompts_codes, dim=0)
|
57 |
+
else:
|
58 |
+
prefix_len = y_prompts_codes.shape[1]
|
59 |
+
|
60 |
+
y_prompts = self.nar_audio_embeddings[0](y_prompts_codes[..., 0])
|
61 |
+
y_emb = self.nar_audio_embeddings[0](y)
|
62 |
+
for j in range(1, self.num_quantizers):
|
63 |
+
y_prompts += self.nar_audio_embeddings[j](y_prompts_codes[..., j])
|
64 |
+
if j < nar_stage:
|
65 |
+
y_emb += self.nar_audio_embeddings[j](codes[..., j])
|
66 |
+
y_emb = torch.concat([y_prompts, y_emb], axis=1)
|
67 |
+
|
68 |
+
return y_emb, prefix_len
|
utils/ssim.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
# This code is modified from https://github.com/Po-Hsun-Su/pytorch-ssim
|
7 |
+
|
8 |
+
import torch
|
9 |
+
import torch.nn.functional as F
|
10 |
+
from torch.autograd import Variable
|
11 |
+
from math import exp
|
12 |
+
|
13 |
+
|
14 |
+
def gaussian(window_size, sigma):
|
15 |
+
gauss = torch.Tensor(
|
16 |
+
[
|
17 |
+
exp(-((x - window_size // 2) ** 2) / float(2 * sigma**2))
|
18 |
+
for x in range(window_size)
|
19 |
+
]
|
20 |
+
)
|
21 |
+
return gauss / gauss.sum()
|
22 |
+
|
23 |
+
|
24 |
+
def create_window(window_size, channel):
|
25 |
+
_1D_window = gaussian(window_size, 1.5).unsqueeze(1)
|
26 |
+
_2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
|
27 |
+
window = Variable(
|
28 |
+
_2D_window.expand(channel, 1, window_size, window_size).contiguous()
|
29 |
+
)
|
30 |
+
return window
|
31 |
+
|
32 |
+
|
33 |
+
def _ssim(img1, img2, window, window_size, channel, size_average=True):
|
34 |
+
mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
|
35 |
+
mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
|
36 |
+
|
37 |
+
mu1_sq = mu1.pow(2)
|
38 |
+
mu2_sq = mu2.pow(2)
|
39 |
+
mu1_mu2 = mu1 * mu2
|
40 |
+
|
41 |
+
sigma1_sq = (
|
42 |
+
F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
|
43 |
+
)
|
44 |
+
sigma2_sq = (
|
45 |
+
F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
|
46 |
+
)
|
47 |
+
sigma12 = (
|
48 |
+
F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel)
|
49 |
+
- mu1_mu2
|
50 |
+
)
|
51 |
+
|
52 |
+
C1 = 0.01**2
|
53 |
+
C2 = 0.03**2
|
54 |
+
|
55 |
+
ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / (
|
56 |
+
(mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)
|
57 |
+
)
|
58 |
+
|
59 |
+
if size_average:
|
60 |
+
return ssim_map.mean()
|
61 |
+
else:
|
62 |
+
return ssim_map.mean(1)
|
63 |
+
|
64 |
+
|
65 |
+
class SSIM(torch.nn.Module):
|
66 |
+
def __init__(self, window_size=11, size_average=True):
|
67 |
+
super(SSIM, self).__init__()
|
68 |
+
self.window_size = window_size
|
69 |
+
self.size_average = size_average
|
70 |
+
self.channel = 1
|
71 |
+
self.window = create_window(window_size, self.channel)
|
72 |
+
|
73 |
+
def forward(self, fake, real, bias=6.0):
|
74 |
+
fake = fake[:, None, :, :] + bias # [B, 1, T, n_mels]
|
75 |
+
real = real[:, None, :, :] + bias # [B, 1, T, n_mels]
|
76 |
+
self.window = self.window.to(dtype=fake.dtype, device=fake.device)
|
77 |
+
loss = 1 - _ssim(
|
78 |
+
fake, real, self.window, self.window_size, self.channel, self.size_average
|
79 |
+
)
|
80 |
+
return loss
|
utils/stft.py
ADDED
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import torch
|
7 |
+
import torch.nn.functional as F
|
8 |
+
import numpy as np
|
9 |
+
from scipy.signal import get_window
|
10 |
+
from librosa.util import pad_center, tiny
|
11 |
+
from librosa.filters import mel as librosa_mel_fn
|
12 |
+
|
13 |
+
import torch
|
14 |
+
import numpy as np
|
15 |
+
import librosa.util as librosa_util
|
16 |
+
from scipy.signal import get_window
|
17 |
+
|
18 |
+
|
19 |
+
def window_sumsquare(
|
20 |
+
window,
|
21 |
+
n_frames,
|
22 |
+
hop_length,
|
23 |
+
win_length,
|
24 |
+
n_fft,
|
25 |
+
dtype=np.float32,
|
26 |
+
norm=None,
|
27 |
+
):
|
28 |
+
"""
|
29 |
+
# from librosa 0.6
|
30 |
+
Compute the sum-square envelope of a window function at a given hop length.
|
31 |
+
|
32 |
+
This is used to estimate modulation effects induced by windowing
|
33 |
+
observations in short-time fourier transforms.
|
34 |
+
|
35 |
+
Parameters
|
36 |
+
----------
|
37 |
+
window : string, tuple, number, callable, or list-like
|
38 |
+
Window specification, as in `get_window`
|
39 |
+
|
40 |
+
n_frames : int > 0
|
41 |
+
The number of analysis frames
|
42 |
+
|
43 |
+
hop_length : int > 0
|
44 |
+
The number of samples to advance between frames
|
45 |
+
|
46 |
+
win_length : [optional]
|
47 |
+
The length of the window function. By default, this matches `n_fft`.
|
48 |
+
|
49 |
+
n_fft : int > 0
|
50 |
+
The length of each analysis frame.
|
51 |
+
|
52 |
+
dtype : np.dtype
|
53 |
+
The data type of the output
|
54 |
+
|
55 |
+
Returns
|
56 |
+
-------
|
57 |
+
wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
|
58 |
+
The sum-squared envelope of the window function
|
59 |
+
"""
|
60 |
+
if win_length is None:
|
61 |
+
win_length = n_fft
|
62 |
+
|
63 |
+
n = n_fft + hop_length * (n_frames - 1)
|
64 |
+
x = np.zeros(n, dtype=dtype)
|
65 |
+
|
66 |
+
# Compute the squared window at the desired length
|
67 |
+
win_sq = get_window(window, win_length, fftbins=True)
|
68 |
+
win_sq = librosa_util.normalize(win_sq, norm=norm) ** 2
|
69 |
+
win_sq = librosa_util.pad_center(win_sq, n_fft)
|
70 |
+
|
71 |
+
# Fill the envelope
|
72 |
+
for i in range(n_frames):
|
73 |
+
sample = i * hop_length
|
74 |
+
x[sample : min(n, sample + n_fft)] += win_sq[: max(0, min(n_fft, n - sample))]
|
75 |
+
return x
|
76 |
+
|
77 |
+
|
78 |
+
def griffin_lim(magnitudes, stft_fn, n_iters=30):
|
79 |
+
"""
|
80 |
+
PARAMS
|
81 |
+
------
|
82 |
+
magnitudes: spectrogram magnitudes
|
83 |
+
stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
|
84 |
+
"""
|
85 |
+
|
86 |
+
angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
|
87 |
+
angles = angles.astype(np.float32)
|
88 |
+
angles = torch.autograd.Variable(torch.from_numpy(angles))
|
89 |
+
signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
|
90 |
+
|
91 |
+
for i in range(n_iters):
|
92 |
+
_, angles = stft_fn.transform(signal)
|
93 |
+
signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
|
94 |
+
return signal
|
95 |
+
|
96 |
+
|
97 |
+
def dynamic_range_compression(x, C=1, clip_val=1e-5):
|
98 |
+
"""
|
99 |
+
PARAMS
|
100 |
+
------
|
101 |
+
C: compression factor
|
102 |
+
"""
|
103 |
+
return torch.log(torch.clamp(x, min=clip_val) * C)
|
104 |
+
|
105 |
+
|
106 |
+
def dynamic_range_decompression(x, C=1):
|
107 |
+
"""
|
108 |
+
PARAMS
|
109 |
+
------
|
110 |
+
C: compression factor used to compress
|
111 |
+
"""
|
112 |
+
return torch.exp(x) / C
|
113 |
+
|
114 |
+
|
115 |
+
class STFT(torch.nn.Module):
|
116 |
+
"""adapted from Prem Seetharaman's https://github.com/pseeth/pytorch-stft"""
|
117 |
+
|
118 |
+
def __init__(self, filter_length, hop_length, win_length, window="hann"):
|
119 |
+
super(STFT, self).__init__()
|
120 |
+
self.filter_length = filter_length
|
121 |
+
self.hop_length = hop_length
|
122 |
+
self.win_length = win_length
|
123 |
+
self.window = window
|
124 |
+
self.forward_transform = None
|
125 |
+
scale = self.filter_length / self.hop_length
|
126 |
+
fourier_basis = np.fft.fft(np.eye(self.filter_length))
|
127 |
+
|
128 |
+
cutoff = int((self.filter_length / 2 + 1))
|
129 |
+
fourier_basis = np.vstack(
|
130 |
+
[np.real(fourier_basis[:cutoff, :]), np.imag(fourier_basis[:cutoff, :])]
|
131 |
+
)
|
132 |
+
|
133 |
+
forward_basis = torch.FloatTensor(fourier_basis[:, None, :])
|
134 |
+
inverse_basis = torch.FloatTensor(
|
135 |
+
np.linalg.pinv(scale * fourier_basis).T[:, None, :]
|
136 |
+
)
|
137 |
+
|
138 |
+
if window is not None:
|
139 |
+
assert filter_length >= win_length
|
140 |
+
# get window and zero center pad it to filter_length
|
141 |
+
fft_window = get_window(window, win_length, fftbins=True)
|
142 |
+
fft_window = pad_center(fft_window, filter_length)
|
143 |
+
fft_window = torch.from_numpy(fft_window).float()
|
144 |
+
|
145 |
+
# window the bases
|
146 |
+
forward_basis *= fft_window
|
147 |
+
inverse_basis *= fft_window
|
148 |
+
|
149 |
+
self.register_buffer("forward_basis", forward_basis.float())
|
150 |
+
self.register_buffer("inverse_basis", inverse_basis.float())
|
151 |
+
|
152 |
+
def transform(self, input_data):
|
153 |
+
num_batches = input_data.size(0)
|
154 |
+
num_samples = input_data.size(1)
|
155 |
+
|
156 |
+
self.num_samples = num_samples
|
157 |
+
|
158 |
+
# similar to librosa, reflect-pad the input
|
159 |
+
input_data = input_data.view(num_batches, 1, num_samples)
|
160 |
+
input_data = F.pad(
|
161 |
+
input_data.unsqueeze(1),
|
162 |
+
(int(self.filter_length / 2), int(self.filter_length / 2), 0, 0),
|
163 |
+
mode="reflect",
|
164 |
+
)
|
165 |
+
input_data = input_data.squeeze(1)
|
166 |
+
|
167 |
+
forward_transform = F.conv1d(
|
168 |
+
input_data.cuda(),
|
169 |
+
torch.autograd.Variable(self.forward_basis, requires_grad=False).cuda(),
|
170 |
+
stride=self.hop_length,
|
171 |
+
padding=0,
|
172 |
+
).cpu()
|
173 |
+
|
174 |
+
cutoff = int((self.filter_length / 2) + 1)
|
175 |
+
real_part = forward_transform[:, :cutoff, :]
|
176 |
+
imag_part = forward_transform[:, cutoff:, :]
|
177 |
+
|
178 |
+
magnitude = torch.sqrt(real_part**2 + imag_part**2)
|
179 |
+
phase = torch.autograd.Variable(torch.atan2(imag_part.data, real_part.data))
|
180 |
+
|
181 |
+
return magnitude, phase
|
182 |
+
|
183 |
+
def inverse(self, magnitude, phase):
|
184 |
+
recombine_magnitude_phase = torch.cat(
|
185 |
+
[magnitude * torch.cos(phase), magnitude * torch.sin(phase)], dim=1
|
186 |
+
)
|
187 |
+
|
188 |
+
inverse_transform = F.conv_transpose1d(
|
189 |
+
recombine_magnitude_phase,
|
190 |
+
torch.autograd.Variable(self.inverse_basis, requires_grad=False),
|
191 |
+
stride=self.hop_length,
|
192 |
+
padding=0,
|
193 |
+
)
|
194 |
+
|
195 |
+
if self.window is not None:
|
196 |
+
window_sum = window_sumsquare(
|
197 |
+
self.window,
|
198 |
+
magnitude.size(-1),
|
199 |
+
hop_length=self.hop_length,
|
200 |
+
win_length=self.win_length,
|
201 |
+
n_fft=self.filter_length,
|
202 |
+
dtype=np.float32,
|
203 |
+
)
|
204 |
+
# remove modulation effects
|
205 |
+
approx_nonzero_indices = torch.from_numpy(
|
206 |
+
np.where(window_sum > tiny(window_sum))[0]
|
207 |
+
)
|
208 |
+
window_sum = torch.autograd.Variable(
|
209 |
+
torch.from_numpy(window_sum), requires_grad=False
|
210 |
+
)
|
211 |
+
window_sum = window_sum.cuda() if magnitude.is_cuda else window_sum
|
212 |
+
inverse_transform[:, :, approx_nonzero_indices] /= window_sum[
|
213 |
+
approx_nonzero_indices
|
214 |
+
]
|
215 |
+
|
216 |
+
# scale by hop ratio
|
217 |
+
inverse_transform *= float(self.filter_length) / self.hop_length
|
218 |
+
|
219 |
+
inverse_transform = inverse_transform[:, :, int(self.filter_length / 2) :]
|
220 |
+
inverse_transform = inverse_transform[:, :, : -int(self.filter_length / 2) :]
|
221 |
+
|
222 |
+
return inverse_transform
|
223 |
+
|
224 |
+
def forward(self, input_data):
|
225 |
+
self.magnitude, self.phase = self.transform(input_data)
|
226 |
+
reconstruction = self.inverse(self.magnitude, self.phase)
|
227 |
+
return reconstruction
|
228 |
+
|
229 |
+
|
230 |
+
class TacotronSTFT(torch.nn.Module):
|
231 |
+
def __init__(
|
232 |
+
self,
|
233 |
+
filter_length,
|
234 |
+
hop_length,
|
235 |
+
win_length,
|
236 |
+
n_mel_channels,
|
237 |
+
sampling_rate,
|
238 |
+
mel_fmin,
|
239 |
+
mel_fmax,
|
240 |
+
):
|
241 |
+
super(TacotronSTFT, self).__init__()
|
242 |
+
self.n_mel_channels = n_mel_channels
|
243 |
+
self.sampling_rate = sampling_rate
|
244 |
+
self.stft_fn = STFT(filter_length, hop_length, win_length)
|
245 |
+
mel_basis = librosa_mel_fn(
|
246 |
+
sampling_rate, filter_length, n_mel_channels, mel_fmin, mel_fmax
|
247 |
+
)
|
248 |
+
mel_basis = torch.from_numpy(mel_basis).float()
|
249 |
+
self.register_buffer("mel_basis", mel_basis)
|
250 |
+
|
251 |
+
def spectral_normalize(self, magnitudes):
|
252 |
+
output = dynamic_range_compression(magnitudes)
|
253 |
+
return output
|
254 |
+
|
255 |
+
def spectral_de_normalize(self, magnitudes):
|
256 |
+
output = dynamic_range_decompression(magnitudes)
|
257 |
+
return output
|
258 |
+
|
259 |
+
def mel_spectrogram(self, y):
|
260 |
+
"""Computes mel-spectrograms from a batch of waves
|
261 |
+
PARAMS
|
262 |
+
------
|
263 |
+
y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1]
|
264 |
+
|
265 |
+
RETURNS
|
266 |
+
-------
|
267 |
+
mel_output: torch.FloatTensor of shape (B, n_mel_channels, T)
|
268 |
+
"""
|
269 |
+
assert torch.min(y.data) >= -1
|
270 |
+
assert torch.max(y.data) <= 1
|
271 |
+
|
272 |
+
magnitudes, phases = self.stft_fn.transform(y)
|
273 |
+
magnitudes = magnitudes.data
|
274 |
+
mel_output = torch.matmul(self.mel_basis, magnitudes)
|
275 |
+
mel_output = self.spectral_normalize(mel_output)
|
276 |
+
energy = torch.norm(magnitudes, dim=1)
|
277 |
+
|
278 |
+
return mel_output, energy
|
utils/symbol_table.py
ADDED
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
# This code is modified from
|
7 |
+
# https://github.com/lifeiteng/vall-e/blob/9c69096d603ce13174fb5cb025f185e2e9b36ac7/valle/utils/symbol_table.py
|
8 |
+
|
9 |
+
from dataclasses import dataclass
|
10 |
+
from dataclasses import field
|
11 |
+
from typing import Dict
|
12 |
+
from typing import Generic
|
13 |
+
from typing import List
|
14 |
+
from typing import Optional
|
15 |
+
from typing import TypeVar
|
16 |
+
from typing import Union
|
17 |
+
|
18 |
+
Symbol = TypeVar("Symbol")
|
19 |
+
|
20 |
+
|
21 |
+
@dataclass(repr=False)
|
22 |
+
class SymbolTable(Generic[Symbol]):
|
23 |
+
"""SymbolTable that maps symbol IDs, found on the FSA arcs to
|
24 |
+
actual objects. These objects can be arbitrary Python objects
|
25 |
+
that can serve as keys in a dictionary (i.e. they need to be
|
26 |
+
hashable and immutable).
|
27 |
+
|
28 |
+
The SymbolTable can only be read to/written from disk if the
|
29 |
+
symbols are strings.
|
30 |
+
"""
|
31 |
+
|
32 |
+
_id2sym: Dict[int, Symbol] = field(default_factory=dict)
|
33 |
+
"""Map an integer to a symbol.
|
34 |
+
"""
|
35 |
+
|
36 |
+
_sym2id: Dict[Symbol, int] = field(default_factory=dict)
|
37 |
+
"""Map a symbol to an integer.
|
38 |
+
"""
|
39 |
+
|
40 |
+
_next_available_id: int = 1
|
41 |
+
"""A helper internal field that helps adding new symbols
|
42 |
+
to the table efficiently.
|
43 |
+
"""
|
44 |
+
|
45 |
+
eps: Symbol = "<eps>"
|
46 |
+
"""Null symbol, always mapped to index 0.
|
47 |
+
"""
|
48 |
+
|
49 |
+
def __post_init__(self):
|
50 |
+
assert all(self._sym2id[sym] == idx for idx, sym in self._id2sym.items())
|
51 |
+
assert all(self._id2sym[idx] == sym for sym, idx in self._sym2id.items())
|
52 |
+
assert 0 not in self._id2sym or self._id2sym[0] == self.eps
|
53 |
+
|
54 |
+
self._next_available_id = max(self._id2sym, default=0) + 1
|
55 |
+
self._id2sym.setdefault(0, self.eps)
|
56 |
+
self._sym2id.setdefault(self.eps, 0)
|
57 |
+
|
58 |
+
@staticmethod
|
59 |
+
def from_str(s: str) -> "SymbolTable":
|
60 |
+
"""Build a symbol table from a string.
|
61 |
+
|
62 |
+
The string consists of lines. Every line has two fields separated
|
63 |
+
by space(s), tab(s) or both. The first field is the symbol and the
|
64 |
+
second the integer id of the symbol.
|
65 |
+
|
66 |
+
Args:
|
67 |
+
s:
|
68 |
+
The input string with the format described above.
|
69 |
+
Returns:
|
70 |
+
An instance of :class:`SymbolTable`.
|
71 |
+
"""
|
72 |
+
id2sym: Dict[int, str] = dict()
|
73 |
+
sym2id: Dict[str, int] = dict()
|
74 |
+
|
75 |
+
for line in s.split("\n"):
|
76 |
+
fields = line.split()
|
77 |
+
if len(fields) == 0:
|
78 |
+
continue # skip empty lines
|
79 |
+
assert (
|
80 |
+
len(fields) == 2
|
81 |
+
), f"Expect a line with 2 fields. Given: {len(fields)}"
|
82 |
+
sym, idx = fields[0], int(fields[1])
|
83 |
+
assert sym not in sym2id, f"Duplicated symbol {sym}"
|
84 |
+
assert idx not in id2sym, f"Duplicated id {idx}"
|
85 |
+
id2sym[idx] = sym
|
86 |
+
sym2id[sym] = idx
|
87 |
+
|
88 |
+
eps = id2sym.get(0, "<eps>")
|
89 |
+
|
90 |
+
return SymbolTable(_id2sym=id2sym, _sym2id=sym2id, eps=eps)
|
91 |
+
|
92 |
+
@staticmethod
|
93 |
+
def from_file(filename: str) -> "SymbolTable":
|
94 |
+
"""Build a symbol table from file.
|
95 |
+
|
96 |
+
Every line in the symbol table file has two fields separated by
|
97 |
+
space(s), tab(s) or both. The following is an example file:
|
98 |
+
|
99 |
+
.. code-block::
|
100 |
+
|
101 |
+
<eps> 0
|
102 |
+
a 1
|
103 |
+
b 2
|
104 |
+
c 3
|
105 |
+
|
106 |
+
Args:
|
107 |
+
filename:
|
108 |
+
Name of the symbol table file. Its format is documented above.
|
109 |
+
|
110 |
+
Returns:
|
111 |
+
An instance of :class:`SymbolTable`.
|
112 |
+
|
113 |
+
"""
|
114 |
+
with open(filename, "r", encoding="utf-8") as f:
|
115 |
+
return SymbolTable.from_str(f.read().strip())
|
116 |
+
|
117 |
+
def to_str(self) -> str:
|
118 |
+
"""
|
119 |
+
Returns:
|
120 |
+
Return a string representation of this object. You can pass
|
121 |
+
it to the method ``from_str`` to recreate an identical object.
|
122 |
+
"""
|
123 |
+
s = ""
|
124 |
+
for idx, symbol in sorted(self._id2sym.items()):
|
125 |
+
s += f"{symbol} {idx}\n"
|
126 |
+
return s
|
127 |
+
|
128 |
+
def to_file(self, filename: str):
|
129 |
+
"""Serialize the SymbolTable to a file.
|
130 |
+
|
131 |
+
Every line in the symbol table file has two fields separated by
|
132 |
+
space(s), tab(s) or both. The following is an example file:
|
133 |
+
|
134 |
+
.. code-block::
|
135 |
+
|
136 |
+
<eps> 0
|
137 |
+
a 1
|
138 |
+
b 2
|
139 |
+
c 3
|
140 |
+
|
141 |
+
Args:
|
142 |
+
filename:
|
143 |
+
Name of the symbol table file. Its format is documented above.
|
144 |
+
"""
|
145 |
+
with open(filename, "w") as f:
|
146 |
+
for idx, symbol in sorted(self._id2sym.items()):
|
147 |
+
print(symbol, idx, file=f)
|
148 |
+
|
149 |
+
def add(self, symbol: Symbol, index: Optional[int] = None) -> int:
|
150 |
+
"""Add a new symbol to the SymbolTable.
|
151 |
+
|
152 |
+
Args:
|
153 |
+
symbol:
|
154 |
+
The symbol to be added.
|
155 |
+
index:
|
156 |
+
Optional int id to which the symbol should be assigned.
|
157 |
+
If it is not available, a ValueError will be raised.
|
158 |
+
|
159 |
+
Returns:
|
160 |
+
The int id to which the symbol has been assigned.
|
161 |
+
"""
|
162 |
+
# Already in the table? Return its ID.
|
163 |
+
if symbol in self._sym2id:
|
164 |
+
return self._sym2id[symbol]
|
165 |
+
# Specific ID not provided - use next available.
|
166 |
+
if index is None:
|
167 |
+
index = self._next_available_id
|
168 |
+
# Specific ID provided but not available.
|
169 |
+
if index in self._id2sym:
|
170 |
+
raise ValueError(
|
171 |
+
f"Cannot assign id '{index}' to '{symbol}' - "
|
172 |
+
f"already occupied by {self._id2sym[index]}"
|
173 |
+
)
|
174 |
+
self._sym2id[symbol] = index
|
175 |
+
self._id2sym[index] = symbol
|
176 |
+
|
177 |
+
# Update next available ID if needed
|
178 |
+
if self._next_available_id <= index:
|
179 |
+
self._next_available_id = index + 1
|
180 |
+
|
181 |
+
return index
|
182 |
+
|
183 |
+
def get(self, k: Union[int, Symbol]) -> Union[Symbol, int]:
|
184 |
+
"""Get a symbol for an id or get an id for a symbol
|
185 |
+
|
186 |
+
Args:
|
187 |
+
k:
|
188 |
+
If it is an id, it tries to find the symbol corresponding
|
189 |
+
to the id; if it is a symbol, it tries to find the id
|
190 |
+
corresponding to the symbol.
|
191 |
+
|
192 |
+
Returns:
|
193 |
+
An id or a symbol depending on the given `k`.
|
194 |
+
"""
|
195 |
+
if isinstance(k, int):
|
196 |
+
return self._id2sym[k]
|
197 |
+
else:
|
198 |
+
return self._sym2id[k]
|
199 |
+
|
200 |
+
def merge(self, other: "SymbolTable") -> "SymbolTable":
|
201 |
+
"""Create a union of two SymbolTables.
|
202 |
+
Raises an AssertionError if the same IDs are occupied by
|
203 |
+
different symbols.
|
204 |
+
|
205 |
+
Args:
|
206 |
+
other:
|
207 |
+
A symbol table to merge with ``self``.
|
208 |
+
|
209 |
+
Returns:
|
210 |
+
A new symbol table.
|
211 |
+
"""
|
212 |
+
self._check_compatible(other)
|
213 |
+
return SymbolTable(
|
214 |
+
_id2sym={**self._id2sym, **other._id2sym},
|
215 |
+
_sym2id={**self._sym2id, **other._sym2id},
|
216 |
+
eps=self.eps,
|
217 |
+
)
|
218 |
+
|
219 |
+
def _check_compatible(self, other: "SymbolTable") -> None:
|
220 |
+
# Epsilon compatibility
|
221 |
+
assert self.eps == other.eps, (
|
222 |
+
f"Mismatched epsilon symbol: " f"{self.eps} != {other.eps}"
|
223 |
+
)
|
224 |
+
# IDs compatibility
|
225 |
+
common_ids = set(self._id2sym).intersection(other._id2sym)
|
226 |
+
for idx in common_ids:
|
227 |
+
assert self[idx] == other[idx], (
|
228 |
+
f"ID conflict for id: {idx}, "
|
229 |
+
f'self[idx] = "{self[idx]}", '
|
230 |
+
f'other[idx] = "{other[idx]}"'
|
231 |
+
)
|
232 |
+
# Symbols compatibility
|
233 |
+
common_symbols = set(self._sym2id).intersection(other._sym2id)
|
234 |
+
for sym in common_symbols:
|
235 |
+
assert self[sym] == other[sym], (
|
236 |
+
f"ID conflict for id: {sym}, "
|
237 |
+
f'self[sym] = "{self[sym]}", '
|
238 |
+
f'other[sym] = "{other[sym]}"'
|
239 |
+
)
|
240 |
+
|
241 |
+
def __getitem__(self, item: Union[int, Symbol]) -> Union[Symbol, int]:
|
242 |
+
return self.get(item)
|
243 |
+
|
244 |
+
def __contains__(self, item: Union[int, Symbol]) -> bool:
|
245 |
+
if isinstance(item, int):
|
246 |
+
return item in self._id2sym
|
247 |
+
else:
|
248 |
+
return item in self._sym2id
|
249 |
+
|
250 |
+
def __len__(self) -> int:
|
251 |
+
return len(self._id2sym)
|
252 |
+
|
253 |
+
def __eq__(self, other: "SymbolTable") -> bool:
|
254 |
+
if len(self) != len(other):
|
255 |
+
return False
|
256 |
+
|
257 |
+
for s in self.symbols:
|
258 |
+
if self[s] != other[s]:
|
259 |
+
return False
|
260 |
+
|
261 |
+
return True
|
262 |
+
|
263 |
+
@property
|
264 |
+
def ids(self) -> List[int]:
|
265 |
+
"""Returns a list of integer IDs corresponding to the symbols."""
|
266 |
+
ans = list(self._id2sym.keys())
|
267 |
+
ans.sort()
|
268 |
+
return ans
|
269 |
+
|
270 |
+
@property
|
271 |
+
def symbols(self) -> List[Symbol]:
|
272 |
+
"""Returns a list of symbols (e.g., strings) corresponding to
|
273 |
+
the integer IDs.
|
274 |
+
"""
|
275 |
+
ans = list(self._sym2id.keys())
|
276 |
+
ans.sort()
|
277 |
+
return ans
|
278 |
+
|
279 |
+
|
280 |
+
class TextToken:
|
281 |
+
def __init__(
|
282 |
+
self,
|
283 |
+
text_tokens: List[str],
|
284 |
+
add_eos: bool = True,
|
285 |
+
add_bos: bool = True,
|
286 |
+
pad_symbol: str = "<pad>",
|
287 |
+
bos_symbol: str = "<bos>",
|
288 |
+
eos_symbol: str = "<eos>",
|
289 |
+
):
|
290 |
+
self.pad_symbol = pad_symbol
|
291 |
+
self.add_eos = add_eos
|
292 |
+
self.add_bos = add_bos
|
293 |
+
self.bos_symbol = bos_symbol
|
294 |
+
self.eos_symbol = eos_symbol
|
295 |
+
|
296 |
+
unique_tokens = [pad_symbol]
|
297 |
+
if add_bos:
|
298 |
+
unique_tokens.append(bos_symbol)
|
299 |
+
if add_eos:
|
300 |
+
unique_tokens.append(eos_symbol)
|
301 |
+
unique_tokens.extend(sorted(text_tokens))
|
302 |
+
|
303 |
+
self.token2idx = {token: idx for idx, token in enumerate(unique_tokens)}
|
304 |
+
self.idx2token = unique_tokens
|
305 |
+
|
306 |
+
def get_token_id_seq(self, text):
|
307 |
+
tokens_seq = [p for p in text]
|
308 |
+
seq = (
|
309 |
+
([self.bos_symbol] if self.add_bos else [])
|
310 |
+
+ tokens_seq
|
311 |
+
+ ([self.eos_symbol] if self.add_eos else [])
|
312 |
+
)
|
313 |
+
|
314 |
+
token_ids = [self.token2idx[token] for token in seq]
|
315 |
+
token_lens = len(tokens_seq) + self.add_eos + self.add_bos
|
316 |
+
|
317 |
+
return token_ids, token_lens
|
utils/tokenizer.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
# This code is modified from
|
7 |
+
# https://github.com/lifeiteng/vall-e/blob/9c69096d603ce13174fb5cb025f185e2e9b36ac7/valle/data/tokenizer.py
|
8 |
+
|
9 |
+
import re
|
10 |
+
from typing import Any, Dict, List, Optional, Pattern, Union
|
11 |
+
|
12 |
+
import torch
|
13 |
+
import torchaudio
|
14 |
+
from encodec import EncodecModel
|
15 |
+
from encodec.utils import convert_audio
|
16 |
+
|
17 |
+
|
18 |
+
class AudioTokenizer:
|
19 |
+
"""EnCodec audio tokenizer for encoding and decoding audio.
|
20 |
+
|
21 |
+
Attributes:
|
22 |
+
device: The device on which the codec model is loaded.
|
23 |
+
codec: The pretrained EnCodec model.
|
24 |
+
sample_rate: Sample rate of the model.
|
25 |
+
channels: Number of audio channels in the model.
|
26 |
+
"""
|
27 |
+
|
28 |
+
def __init__(self, device: Any = None) -> None:
|
29 |
+
model = EncodecModel.encodec_model_24khz()
|
30 |
+
model.set_target_bandwidth(6.0)
|
31 |
+
remove_encodec_weight_norm(model)
|
32 |
+
|
33 |
+
if not device:
|
34 |
+
device = torch.device("cpu")
|
35 |
+
if torch.cuda.is_available():
|
36 |
+
device = torch.device("cuda:0")
|
37 |
+
|
38 |
+
self._device = device
|
39 |
+
|
40 |
+
self.codec = model.to(device)
|
41 |
+
self.sample_rate = model.sample_rate
|
42 |
+
self.channels = model.channels
|
43 |
+
|
44 |
+
@property
|
45 |
+
def device(self):
|
46 |
+
return self._device
|
47 |
+
|
48 |
+
def encode(self, wav: torch.Tensor) -> torch.Tensor:
|
49 |
+
"""Encode the audio waveform.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
wav: A tensor representing the audio waveform.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
A tensor representing the encoded audio.
|
56 |
+
"""
|
57 |
+
return self.codec.encode(wav.to(self.device))
|
58 |
+
|
59 |
+
def decode(self, frames: torch.Tensor) -> torch.Tensor:
|
60 |
+
"""Decode the encoded audio frames.
|
61 |
+
|
62 |
+
Args:
|
63 |
+
frames: A tensor representing the encoded audio frames.
|
64 |
+
|
65 |
+
Returns:
|
66 |
+
A tensor representing the decoded audio waveform.
|
67 |
+
"""
|
68 |
+
return self.codec.decode(frames)
|
69 |
+
|
70 |
+
|
71 |
+
def tokenize_audio(tokenizer: AudioTokenizer, audio_path: str):
|
72 |
+
"""
|
73 |
+
Tokenize the audio waveform using the given AudioTokenizer.
|
74 |
+
|
75 |
+
Args:
|
76 |
+
tokenizer: An instance of AudioTokenizer.
|
77 |
+
audio_path: Path to the audio file.
|
78 |
+
|
79 |
+
Returns:
|
80 |
+
A tensor of encoded frames from the audio.
|
81 |
+
|
82 |
+
Raises:
|
83 |
+
FileNotFoundError: If the audio file is not found.
|
84 |
+
RuntimeError: If there's an error processing the audio data.
|
85 |
+
"""
|
86 |
+
# try:
|
87 |
+
# Load and preprocess the audio waveform
|
88 |
+
wav, sr = torchaudio.load(audio_path)
|
89 |
+
wav = convert_audio(wav, sr, tokenizer.sample_rate, tokenizer.channels)
|
90 |
+
wav = wav.unsqueeze(0)
|
91 |
+
|
92 |
+
# Extract discrete codes from EnCodec
|
93 |
+
with torch.no_grad():
|
94 |
+
encoded_frames = tokenizer.encode(wav)
|
95 |
+
return encoded_frames
|
96 |
+
|
97 |
+
# except FileNotFoundError:
|
98 |
+
# raise FileNotFoundError(f"Audio file not found at {audio_path}")
|
99 |
+
# except Exception as e:
|
100 |
+
# raise RuntimeError(f"Error processing audio data: {e}")
|
101 |
+
|
102 |
+
|
103 |
+
def remove_encodec_weight_norm(model):
|
104 |
+
from encodec.modules import SConv1d
|
105 |
+
from encodec.modules.seanet import SConvTranspose1d, SEANetResnetBlock
|
106 |
+
from torch.nn.utils import remove_weight_norm
|
107 |
+
|
108 |
+
encoder = model.encoder.model
|
109 |
+
for key in encoder._modules:
|
110 |
+
if isinstance(encoder._modules[key], SEANetResnetBlock):
|
111 |
+
remove_weight_norm(encoder._modules[key].shortcut.conv.conv)
|
112 |
+
block_modules = encoder._modules[key].block._modules
|
113 |
+
for skey in block_modules:
|
114 |
+
if isinstance(block_modules[skey], SConv1d):
|
115 |
+
remove_weight_norm(block_modules[skey].conv.conv)
|
116 |
+
elif isinstance(encoder._modules[key], SConv1d):
|
117 |
+
remove_weight_norm(encoder._modules[key].conv.conv)
|
118 |
+
|
119 |
+
decoder = model.decoder.model
|
120 |
+
for key in decoder._modules:
|
121 |
+
if isinstance(decoder._modules[key], SEANetResnetBlock):
|
122 |
+
remove_weight_norm(decoder._modules[key].shortcut.conv.conv)
|
123 |
+
block_modules = decoder._modules[key].block._modules
|
124 |
+
for skey in block_modules:
|
125 |
+
if isinstance(block_modules[skey], SConv1d):
|
126 |
+
remove_weight_norm(block_modules[skey].conv.conv)
|
127 |
+
elif isinstance(decoder._modules[key], SConvTranspose1d):
|
128 |
+
remove_weight_norm(decoder._modules[key].convtr.convtr)
|
129 |
+
elif isinstance(decoder._modules[key], SConv1d):
|
130 |
+
remove_weight_norm(decoder._modules[key].conv.conv)
|
131 |
+
|
132 |
+
|
133 |
+
def extract_encodec_token(wav_path):
|
134 |
+
model = EncodecModel.encodec_model_24khz()
|
135 |
+
model.set_target_bandwidth(6.0)
|
136 |
+
|
137 |
+
wav, sr = torchaudio.load(wav_path)
|
138 |
+
wav = convert_audio(wav, sr, model.sample_rate, model.channels)
|
139 |
+
wav = wav.unsqueeze(0)
|
140 |
+
if torch.cuda.is_available():
|
141 |
+
model = model.cuda()
|
142 |
+
wav = wav.cuda()
|
143 |
+
with torch.no_grad():
|
144 |
+
encoded_frames = model.encode(wav)
|
145 |
+
codes_ = torch.cat(
|
146 |
+
[encoded[0] for encoded in encoded_frames], dim=-1
|
147 |
+
) # [B, n_q, T]
|
148 |
+
codes = codes_.cpu().numpy()[0, :, :].T # [T, 8]
|
149 |
+
|
150 |
+
return codes
|
utils/topk_sampling.py
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
|
7 |
+
import torch
|
8 |
+
import torch.nn.functional as F
|
9 |
+
|
10 |
+
|
11 |
+
# This function is modified from https://github.com/microsoft/unilm/blob/master/xtune/src/transformers/modeling_utils.py
|
12 |
+
def top_k_top_p_filtering(
|
13 |
+
logits, top_k=0, top_p=1.0, filter_value=-float("Inf"), min_tokens_to_keep=1
|
14 |
+
):
|
15 |
+
"""
|
16 |
+
Filter a distribution of logits using top-k and/or nucleus (top-p) filtering.
|
17 |
+
|
18 |
+
Args:
|
19 |
+
logits (torch.Tensor): Logits distribution with shape (batch size, vocabulary size).
|
20 |
+
top_k (int, optional): Keep only top k tokens with highest probability (top-k filtering).
|
21 |
+
Set to 0 to disable. Defaults to 0.
|
22 |
+
top_p (float, optional): Keep the top tokens with a cumulative probability >= top_p (nucleus filtering).
|
23 |
+
Must be between 0 and 1, inclusive. Defaults to 1.0.
|
24 |
+
filter_value (float, optional): The value to assign to filtered logits. Defaults to -float('Inf').
|
25 |
+
min_tokens_to_keep (int, optional): Ensure that at least this number of tokens are kept per batch example.
|
26 |
+
Defaults to 1.
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
torch.Tensor: The filtered logits.
|
30 |
+
"""
|
31 |
+
"""
|
32 |
+
Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
|
33 |
+
Make sure we keep at least min_tokens_to_keep per batch example in the output
|
34 |
+
From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
|
35 |
+
"""
|
36 |
+
if top_k > 0:
|
37 |
+
# Apply top-k filtering
|
38 |
+
top_k = min(max(top_k, min_tokens_to_keep), logits.size(-1))
|
39 |
+
indices_to_remove = logits < torch.topk(logits, top_k).values[..., -1, None]
|
40 |
+
logits[indices_to_remove] = filter_value
|
41 |
+
|
42 |
+
if top_p < 1.0:
|
43 |
+
# Apply top-p filtering
|
44 |
+
sorted_logits, sorted_indices = torch.sort(logits, descending=True)
|
45 |
+
cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
|
46 |
+
|
47 |
+
# Create a mask to remove tokens with cumulative probability above the top_p threshold
|
48 |
+
sorted_indices_to_remove = cumulative_probs > top_p
|
49 |
+
if min_tokens_to_keep > 1:
|
50 |
+
sorted_indices_to_remove[..., :min_tokens_to_keep] = 0
|
51 |
+
sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
|
52 |
+
sorted_indices_to_remove[..., 0] = 0
|
53 |
+
|
54 |
+
# Scatter sorted tensors back to original indexing
|
55 |
+
indices_to_remove = sorted_indices.scatter(
|
56 |
+
1, sorted_indices, sorted_indices_to_remove
|
57 |
+
)
|
58 |
+
logits[indices_to_remove] = filter_value
|
59 |
+
|
60 |
+
return logits
|
61 |
+
|
62 |
+
|
63 |
+
def topk_sampling(logits, top_k=50, top_p=1.0, temperature=1.0):
|
64 |
+
"""
|
65 |
+
Perform top-k and top-p sampling on logits.
|
66 |
+
|
67 |
+
Args:
|
68 |
+
logits (torch.Tensor): The logits to sample from.
|
69 |
+
top_k (int, optional): The number of highest probability tokens to keep for top-k filtering.
|
70 |
+
Must be a positive integer. Defaults to 50.
|
71 |
+
top_p (float, optional): The cumulative probability threshold for nucleus sampling.
|
72 |
+
Must be between 0 and 1. Defaults to 1.0.
|
73 |
+
temperature (float, optional): The scaling factor to adjust the logits distribution.
|
74 |
+
Must be strictly positive. Defaults to 1.0.
|
75 |
+
|
76 |
+
Returns:
|
77 |
+
torch.Tensor: The sampled token.
|
78 |
+
"""
|
79 |
+
|
80 |
+
# Adjust logits using temperature
|
81 |
+
if temperature != 1.0:
|
82 |
+
logits = logits / temperature
|
83 |
+
|
84 |
+
# Top-p/top-k filtering
|
85 |
+
logits = top_k_top_p_filtering(logits, top_k=top_k, top_p=top_p)
|
86 |
+
|
87 |
+
# Sample from the filtered distribution
|
88 |
+
token = torch.multinomial(F.softmax(logits, dim=-1), num_samples=1)
|
89 |
+
return token
|
utils/trainer_utils.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import torch
|
7 |
+
|
8 |
+
|
9 |
+
def check_nan(logger, loss, y_pred, y_gt):
|
10 |
+
if torch.any(torch.isnan(loss)):
|
11 |
+
logger.info("out has nan: ", torch.any(torch.isnan(y_pred)))
|
12 |
+
logger.info("y_gt has nan: ", torch.any(torch.isnan(y_gt)))
|
13 |
+
logger.info("out: ", y_pred)
|
14 |
+
logger.info("y_gt: ", y_gt)
|
15 |
+
logger.info("loss = {:.4f}\n".format(loss.item()))
|
16 |
+
exit()
|
utils/util.py
ADDED
@@ -0,0 +1,687 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
|
7 |
+
import collections
|
8 |
+
import glob
|
9 |
+
import os
|
10 |
+
import random
|
11 |
+
import time
|
12 |
+
import argparse
|
13 |
+
from collections import OrderedDict
|
14 |
+
|
15 |
+
import json5
|
16 |
+
import numpy as np
|
17 |
+
import glob
|
18 |
+
from torch.nn import functional as F
|
19 |
+
|
20 |
+
|
21 |
+
try:
|
22 |
+
from ruamel.yaml import YAML as yaml
|
23 |
+
except:
|
24 |
+
from ruamel_yaml import YAML as yaml
|
25 |
+
|
26 |
+
import torch
|
27 |
+
|
28 |
+
from utils.hparam import HParams
|
29 |
+
import logging
|
30 |
+
from logging import handlers
|
31 |
+
|
32 |
+
|
33 |
+
def str2bool(v):
|
34 |
+
"""Used in argparse.ArgumentParser.add_argument to indicate
|
35 |
+
that a type is a bool type and user can enter
|
36 |
+
|
37 |
+
- yes, true, t, y, 1, to represent True
|
38 |
+
- no, false, f, n, 0, to represent False
|
39 |
+
|
40 |
+
See https://stackoverflow.com/questions/15008758/parsing-boolean-values-with-argparse # noqa
|
41 |
+
"""
|
42 |
+
if isinstance(v, bool):
|
43 |
+
return v
|
44 |
+
if v.lower() in ("yes", "true", "t", "y", "1"):
|
45 |
+
return True
|
46 |
+
elif v.lower() in ("no", "false", "f", "n", "0"):
|
47 |
+
return False
|
48 |
+
else:
|
49 |
+
raise argparse.ArgumentTypeError("Boolean value expected.")
|
50 |
+
|
51 |
+
|
52 |
+
def find_checkpoint_of_mapper(mapper_ckpt_dir):
|
53 |
+
mapper_ckpts = glob.glob(os.path.join(mapper_ckpt_dir, "ckpts/*.pt"))
|
54 |
+
|
55 |
+
# Select the max steps
|
56 |
+
mapper_ckpts.sort()
|
57 |
+
mapper_weights_file = mapper_ckpts[-1]
|
58 |
+
return mapper_weights_file
|
59 |
+
|
60 |
+
|
61 |
+
def pad_f0_to_tensors(f0s, batched=None):
|
62 |
+
# Initialize
|
63 |
+
tensors = []
|
64 |
+
|
65 |
+
if batched == None:
|
66 |
+
# Get the max frame for padding
|
67 |
+
size = -1
|
68 |
+
for f0 in f0s:
|
69 |
+
size = max(size, f0.shape[-1])
|
70 |
+
|
71 |
+
tensor = torch.zeros(len(f0s), size)
|
72 |
+
|
73 |
+
for i, f0 in enumerate(f0s):
|
74 |
+
tensor[i, : f0.shape[-1]] = f0[:]
|
75 |
+
|
76 |
+
tensors.append(tensor)
|
77 |
+
else:
|
78 |
+
start = 0
|
79 |
+
while start + batched - 1 < len(f0s):
|
80 |
+
end = start + batched - 1
|
81 |
+
|
82 |
+
# Get the max frame for padding
|
83 |
+
size = -1
|
84 |
+
for i in range(start, end + 1):
|
85 |
+
size = max(size, f0s[i].shape[-1])
|
86 |
+
|
87 |
+
tensor = torch.zeros(batched, size)
|
88 |
+
|
89 |
+
for i in range(start, end + 1):
|
90 |
+
tensor[i - start, : f0s[i].shape[-1]] = f0s[i][:]
|
91 |
+
|
92 |
+
tensors.append(tensor)
|
93 |
+
|
94 |
+
start = start + batched
|
95 |
+
|
96 |
+
if start != len(f0s):
|
97 |
+
end = len(f0s)
|
98 |
+
|
99 |
+
# Get the max frame for padding
|
100 |
+
size = -1
|
101 |
+
for i in range(start, end):
|
102 |
+
size = max(size, f0s[i].shape[-1])
|
103 |
+
|
104 |
+
tensor = torch.zeros(len(f0s) - start, size)
|
105 |
+
|
106 |
+
for i in range(start, end):
|
107 |
+
tensor[i - start, : f0s[i].shape[-1]] = f0s[i][:]
|
108 |
+
|
109 |
+
tensors.append(tensor)
|
110 |
+
|
111 |
+
return tensors
|
112 |
+
|
113 |
+
|
114 |
+
def pad_mels_to_tensors(mels, batched=None):
|
115 |
+
"""
|
116 |
+
Args:
|
117 |
+
mels: A list of mel-specs
|
118 |
+
Returns:
|
119 |
+
tensors: A list of tensors containing the batched mel-specs
|
120 |
+
mel_frames: A list of tensors containing the frames of the original mel-specs
|
121 |
+
"""
|
122 |
+
# Initialize
|
123 |
+
tensors = []
|
124 |
+
mel_frames = []
|
125 |
+
|
126 |
+
# Split mel-specs into batches to avoid cuda memory exceed
|
127 |
+
if batched == None:
|
128 |
+
# Get the max frame for padding
|
129 |
+
size = -1
|
130 |
+
for mel in mels:
|
131 |
+
size = max(size, mel.shape[-1])
|
132 |
+
|
133 |
+
tensor = torch.zeros(len(mels), mels[0].shape[0], size)
|
134 |
+
mel_frame = torch.zeros(len(mels), dtype=torch.int32)
|
135 |
+
|
136 |
+
for i, mel in enumerate(mels):
|
137 |
+
tensor[i, :, : mel.shape[-1]] = mel[:]
|
138 |
+
mel_frame[i] = mel.shape[-1]
|
139 |
+
|
140 |
+
tensors.append(tensor)
|
141 |
+
mel_frames.append(mel_frame)
|
142 |
+
else:
|
143 |
+
start = 0
|
144 |
+
while start + batched - 1 < len(mels):
|
145 |
+
end = start + batched - 1
|
146 |
+
|
147 |
+
# Get the max frame for padding
|
148 |
+
size = -1
|
149 |
+
for i in range(start, end + 1):
|
150 |
+
size = max(size, mels[i].shape[-1])
|
151 |
+
|
152 |
+
tensor = torch.zeros(batched, mels[0].shape[0], size)
|
153 |
+
mel_frame = torch.zeros(batched, dtype=torch.int32)
|
154 |
+
|
155 |
+
for i in range(start, end + 1):
|
156 |
+
tensor[i - start, :, : mels[i].shape[-1]] = mels[i][:]
|
157 |
+
mel_frame[i - start] = mels[i].shape[-1]
|
158 |
+
|
159 |
+
tensors.append(tensor)
|
160 |
+
mel_frames.append(mel_frame)
|
161 |
+
|
162 |
+
start = start + batched
|
163 |
+
|
164 |
+
if start != len(mels):
|
165 |
+
end = len(mels)
|
166 |
+
|
167 |
+
# Get the max frame for padding
|
168 |
+
size = -1
|
169 |
+
for i in range(start, end):
|
170 |
+
size = max(size, mels[i].shape[-1])
|
171 |
+
|
172 |
+
tensor = torch.zeros(len(mels) - start, mels[0].shape[0], size)
|
173 |
+
mel_frame = torch.zeros(len(mels) - start, dtype=torch.int32)
|
174 |
+
|
175 |
+
for i in range(start, end):
|
176 |
+
tensor[i - start, :, : mels[i].shape[-1]] = mels[i][:]
|
177 |
+
mel_frame[i - start] = mels[i].shape[-1]
|
178 |
+
|
179 |
+
tensors.append(tensor)
|
180 |
+
mel_frames.append(mel_frame)
|
181 |
+
|
182 |
+
return tensors, mel_frames
|
183 |
+
|
184 |
+
|
185 |
+
def load_model_config(args):
|
186 |
+
"""Load model configurations (in args.json under checkpoint directory)
|
187 |
+
|
188 |
+
Args:
|
189 |
+
args (ArgumentParser): arguments to run bins/preprocess.py
|
190 |
+
|
191 |
+
Returns:
|
192 |
+
dict: dictionary that stores model configurations
|
193 |
+
"""
|
194 |
+
if args.checkpoint_dir is None:
|
195 |
+
assert args.checkpoint_file is not None
|
196 |
+
checkpoint_dir = os.path.split(args.checkpoint_file)[0]
|
197 |
+
else:
|
198 |
+
checkpoint_dir = args.checkpoint_dir
|
199 |
+
config_path = os.path.join(checkpoint_dir, "args.json")
|
200 |
+
print("config_path: ", config_path)
|
201 |
+
|
202 |
+
config = load_config(config_path)
|
203 |
+
return config
|
204 |
+
|
205 |
+
|
206 |
+
def remove_and_create(dir):
|
207 |
+
if os.path.exists(dir):
|
208 |
+
os.system("rm -r {}".format(dir))
|
209 |
+
os.makedirs(dir, exist_ok=True)
|
210 |
+
|
211 |
+
|
212 |
+
def has_existed(path, warning=False):
|
213 |
+
if not warning:
|
214 |
+
return os.path.exists(path)
|
215 |
+
|
216 |
+
if os.path.exists(path):
|
217 |
+
answer = input(
|
218 |
+
"The path {} has existed. \nInput 'y' (or hit Enter) to skip it, and input 'n' to re-write it [y/n]\n".format(
|
219 |
+
path
|
220 |
+
)
|
221 |
+
)
|
222 |
+
if not answer == "n":
|
223 |
+
return True
|
224 |
+
|
225 |
+
return False
|
226 |
+
|
227 |
+
|
228 |
+
def remove_older_ckpt(saved_model_name, checkpoint_dir, max_to_keep=5):
|
229 |
+
if os.path.exists(os.path.join(checkpoint_dir, "checkpoint")):
|
230 |
+
with open(os.path.join(checkpoint_dir, "checkpoint"), "r") as f:
|
231 |
+
ckpts = [x.strip() for x in f.readlines()]
|
232 |
+
else:
|
233 |
+
ckpts = []
|
234 |
+
ckpts.append(saved_model_name)
|
235 |
+
for item in ckpts[:-max_to_keep]:
|
236 |
+
if os.path.exists(os.path.join(checkpoint_dir, item)):
|
237 |
+
os.remove(os.path.join(checkpoint_dir, item))
|
238 |
+
with open(os.path.join(checkpoint_dir, "checkpoint"), "w") as f:
|
239 |
+
for item in ckpts[-max_to_keep:]:
|
240 |
+
f.write("{}\n".format(item))
|
241 |
+
|
242 |
+
|
243 |
+
def set_all_random_seed(seed: int):
|
244 |
+
random.seed(seed)
|
245 |
+
np.random.seed(seed)
|
246 |
+
torch.random.manual_seed(seed)
|
247 |
+
|
248 |
+
|
249 |
+
def save_checkpoint(
|
250 |
+
args,
|
251 |
+
generator,
|
252 |
+
g_optimizer,
|
253 |
+
step,
|
254 |
+
discriminator=None,
|
255 |
+
d_optimizer=None,
|
256 |
+
max_to_keep=5,
|
257 |
+
):
|
258 |
+
saved_model_name = "model.ckpt-{}.pt".format(step)
|
259 |
+
checkpoint_path = os.path.join(args.checkpoint_dir, saved_model_name)
|
260 |
+
|
261 |
+
if discriminator and d_optimizer:
|
262 |
+
torch.save(
|
263 |
+
{
|
264 |
+
"generator": generator.state_dict(),
|
265 |
+
"discriminator": discriminator.state_dict(),
|
266 |
+
"g_optimizer": g_optimizer.state_dict(),
|
267 |
+
"d_optimizer": d_optimizer.state_dict(),
|
268 |
+
"global_step": step,
|
269 |
+
},
|
270 |
+
checkpoint_path,
|
271 |
+
)
|
272 |
+
else:
|
273 |
+
torch.save(
|
274 |
+
{
|
275 |
+
"generator": generator.state_dict(),
|
276 |
+
"g_optimizer": g_optimizer.state_dict(),
|
277 |
+
"global_step": step,
|
278 |
+
},
|
279 |
+
checkpoint_path,
|
280 |
+
)
|
281 |
+
|
282 |
+
print("Saved checkpoint: {}".format(checkpoint_path))
|
283 |
+
|
284 |
+
if os.path.exists(os.path.join(args.checkpoint_dir, "checkpoint")):
|
285 |
+
with open(os.path.join(args.checkpoint_dir, "checkpoint"), "r") as f:
|
286 |
+
ckpts = [x.strip() for x in f.readlines()]
|
287 |
+
else:
|
288 |
+
ckpts = []
|
289 |
+
ckpts.append(saved_model_name)
|
290 |
+
for item in ckpts[:-max_to_keep]:
|
291 |
+
if os.path.exists(os.path.join(args.checkpoint_dir, item)):
|
292 |
+
os.remove(os.path.join(args.checkpoint_dir, item))
|
293 |
+
with open(os.path.join(args.checkpoint_dir, "checkpoint"), "w") as f:
|
294 |
+
for item in ckpts[-max_to_keep:]:
|
295 |
+
f.write("{}\n".format(item))
|
296 |
+
|
297 |
+
|
298 |
+
def attempt_to_restore(
|
299 |
+
generator, g_optimizer, checkpoint_dir, discriminator=None, d_optimizer=None
|
300 |
+
):
|
301 |
+
checkpoint_list = os.path.join(checkpoint_dir, "checkpoint")
|
302 |
+
if os.path.exists(checkpoint_list):
|
303 |
+
checkpoint_filename = open(checkpoint_list).readlines()[-1].strip()
|
304 |
+
checkpoint_path = os.path.join(checkpoint_dir, "{}".format(checkpoint_filename))
|
305 |
+
print("Restore from {}".format(checkpoint_path))
|
306 |
+
checkpoint = torch.load(checkpoint_path, map_location="cpu")
|
307 |
+
if generator:
|
308 |
+
if not list(generator.state_dict().keys())[0].startswith("module."):
|
309 |
+
raw_dict = checkpoint["generator"]
|
310 |
+
clean_dict = OrderedDict()
|
311 |
+
for k, v in raw_dict.items():
|
312 |
+
if k.startswith("module."):
|
313 |
+
clean_dict[k[7:]] = v
|
314 |
+
else:
|
315 |
+
clean_dict[k] = v
|
316 |
+
generator.load_state_dict(clean_dict)
|
317 |
+
else:
|
318 |
+
generator.load_state_dict(checkpoint["generator"])
|
319 |
+
if g_optimizer:
|
320 |
+
g_optimizer.load_state_dict(checkpoint["g_optimizer"])
|
321 |
+
global_step = 100000
|
322 |
+
if discriminator and "discriminator" in checkpoint.keys():
|
323 |
+
discriminator.load_state_dict(checkpoint["discriminator"])
|
324 |
+
global_step = checkpoint["global_step"]
|
325 |
+
print("restore discriminator")
|
326 |
+
if d_optimizer and "d_optimizer" in checkpoint.keys():
|
327 |
+
d_optimizer.load_state_dict(checkpoint["d_optimizer"])
|
328 |
+
print("restore d_optimizer...")
|
329 |
+
else:
|
330 |
+
global_step = 0
|
331 |
+
return global_step
|
332 |
+
|
333 |
+
|
334 |
+
class ExponentialMovingAverage(object):
|
335 |
+
def __init__(self, decay):
|
336 |
+
self.decay = decay
|
337 |
+
self.shadow = {}
|
338 |
+
|
339 |
+
def register(self, name, val):
|
340 |
+
self.shadow[name] = val.clone()
|
341 |
+
|
342 |
+
def update(self, name, x):
|
343 |
+
assert name in self.shadow
|
344 |
+
update_delta = self.shadow[name] - x
|
345 |
+
self.shadow[name] -= (1.0 - self.decay) * update_delta
|
346 |
+
|
347 |
+
|
348 |
+
def apply_moving_average(model, ema):
|
349 |
+
for name, param in model.named_parameters():
|
350 |
+
if name in ema.shadow:
|
351 |
+
ema.update(name, param.data)
|
352 |
+
|
353 |
+
|
354 |
+
def register_model_to_ema(model, ema):
|
355 |
+
for name, param in model.named_parameters():
|
356 |
+
if param.requires_grad:
|
357 |
+
ema.register(name, param.data)
|
358 |
+
|
359 |
+
|
360 |
+
class YParams(HParams):
|
361 |
+
def __init__(self, yaml_file):
|
362 |
+
if not os.path.exists(yaml_file):
|
363 |
+
raise IOError("yaml file: {} is not existed".format(yaml_file))
|
364 |
+
super().__init__()
|
365 |
+
self.d = collections.OrderedDict()
|
366 |
+
with open(yaml_file) as fp:
|
367 |
+
for _, v in yaml().load(fp).items():
|
368 |
+
for k1, v1 in v.items():
|
369 |
+
try:
|
370 |
+
if self.get(k1):
|
371 |
+
self.set_hparam(k1, v1)
|
372 |
+
else:
|
373 |
+
self.add_hparam(k1, v1)
|
374 |
+
self.d[k1] = v1
|
375 |
+
except Exception:
|
376 |
+
import traceback
|
377 |
+
|
378 |
+
print(traceback.format_exc())
|
379 |
+
|
380 |
+
# @property
|
381 |
+
def get_elements(self):
|
382 |
+
return self.d.items()
|
383 |
+
|
384 |
+
|
385 |
+
def override_config(base_config, new_config):
|
386 |
+
"""Update new configurations in the original dict with the new dict
|
387 |
+
|
388 |
+
Args:
|
389 |
+
base_config (dict): original dict to be overridden
|
390 |
+
new_config (dict): dict with new configurations
|
391 |
+
|
392 |
+
Returns:
|
393 |
+
dict: updated configuration dict
|
394 |
+
"""
|
395 |
+
for k, v in new_config.items():
|
396 |
+
if type(v) == dict:
|
397 |
+
if k not in base_config.keys():
|
398 |
+
base_config[k] = {}
|
399 |
+
base_config[k] = override_config(base_config[k], v)
|
400 |
+
else:
|
401 |
+
base_config[k] = v
|
402 |
+
return base_config
|
403 |
+
|
404 |
+
|
405 |
+
def get_lowercase_keys_config(cfg):
|
406 |
+
"""Change all keys in cfg to lower case
|
407 |
+
|
408 |
+
Args:
|
409 |
+
cfg (dict): dictionary that stores configurations
|
410 |
+
|
411 |
+
Returns:
|
412 |
+
dict: dictionary that stores configurations
|
413 |
+
"""
|
414 |
+
updated_cfg = dict()
|
415 |
+
for k, v in cfg.items():
|
416 |
+
if type(v) == dict:
|
417 |
+
v = get_lowercase_keys_config(v)
|
418 |
+
updated_cfg[k.lower()] = v
|
419 |
+
return updated_cfg
|
420 |
+
|
421 |
+
|
422 |
+
def _load_config(config_fn, lowercase=False):
|
423 |
+
"""Load configurations into a dictionary
|
424 |
+
|
425 |
+
Args:
|
426 |
+
config_fn (str): path to configuration file
|
427 |
+
lowercase (bool, optional): whether changing keys to lower case. Defaults to False.
|
428 |
+
|
429 |
+
Returns:
|
430 |
+
dict: dictionary that stores configurations
|
431 |
+
"""
|
432 |
+
with open(config_fn, "r") as f:
|
433 |
+
data = f.read()
|
434 |
+
config_ = json5.loads(data)
|
435 |
+
if "base_config" in config_:
|
436 |
+
# load configurations from new path
|
437 |
+
p_config_path = os.path.join(os.getenv("WORK_DIR"), config_["base_config"])
|
438 |
+
p_config_ = _load_config(p_config_path)
|
439 |
+
config_ = override_config(p_config_, config_)
|
440 |
+
if lowercase:
|
441 |
+
# change keys in config_ to lower case
|
442 |
+
config_ = get_lowercase_keys_config(config_)
|
443 |
+
return config_
|
444 |
+
|
445 |
+
|
446 |
+
def load_config(config_fn, lowercase=False):
|
447 |
+
"""Load configurations into a dictionary
|
448 |
+
|
449 |
+
Args:
|
450 |
+
config_fn (str): path to configuration file
|
451 |
+
lowercase (bool, optional): _description_. Defaults to False.
|
452 |
+
|
453 |
+
Returns:
|
454 |
+
JsonHParams: an object that stores configurations
|
455 |
+
"""
|
456 |
+
config_ = _load_config(config_fn, lowercase=lowercase)
|
457 |
+
# create an JsonHParams object with configuration dict
|
458 |
+
cfg = JsonHParams(**config_)
|
459 |
+
return cfg
|
460 |
+
|
461 |
+
|
462 |
+
def save_config(save_path, cfg):
|
463 |
+
"""Save configurations into a json file
|
464 |
+
|
465 |
+
Args:
|
466 |
+
save_path (str): path to save configurations
|
467 |
+
cfg (dict): dictionary that stores configurations
|
468 |
+
"""
|
469 |
+
with open(save_path, "w") as f:
|
470 |
+
json5.dump(
|
471 |
+
cfg, f, ensure_ascii=False, indent=4, quote_keys=True, sort_keys=True
|
472 |
+
)
|
473 |
+
|
474 |
+
|
475 |
+
class JsonHParams:
|
476 |
+
def __init__(self, **kwargs):
|
477 |
+
for k, v in kwargs.items():
|
478 |
+
if type(v) == dict:
|
479 |
+
v = JsonHParams(**v)
|
480 |
+
self[k] = v
|
481 |
+
|
482 |
+
def keys(self):
|
483 |
+
return self.__dict__.keys()
|
484 |
+
|
485 |
+
def items(self):
|
486 |
+
return self.__dict__.items()
|
487 |
+
|
488 |
+
def values(self):
|
489 |
+
return self.__dict__.values()
|
490 |
+
|
491 |
+
def __len__(self):
|
492 |
+
return len(self.__dict__)
|
493 |
+
|
494 |
+
def __getitem__(self, key):
|
495 |
+
return getattr(self, key)
|
496 |
+
|
497 |
+
def __setitem__(self, key, value):
|
498 |
+
return setattr(self, key, value)
|
499 |
+
|
500 |
+
def __contains__(self, key):
|
501 |
+
return key in self.__dict__
|
502 |
+
|
503 |
+
def __repr__(self):
|
504 |
+
return self.__dict__.__repr__()
|
505 |
+
|
506 |
+
|
507 |
+
class ValueWindow:
|
508 |
+
def __init__(self, window_size=100):
|
509 |
+
self._window_size = window_size
|
510 |
+
self._values = []
|
511 |
+
|
512 |
+
def append(self, x):
|
513 |
+
self._values = self._values[-(self._window_size - 1) :] + [x]
|
514 |
+
|
515 |
+
@property
|
516 |
+
def sum(self):
|
517 |
+
return sum(self._values)
|
518 |
+
|
519 |
+
@property
|
520 |
+
def count(self):
|
521 |
+
return len(self._values)
|
522 |
+
|
523 |
+
@property
|
524 |
+
def average(self):
|
525 |
+
return self.sum / max(1, self.count)
|
526 |
+
|
527 |
+
def reset(self):
|
528 |
+
self._values = []
|
529 |
+
|
530 |
+
|
531 |
+
class Logger(object):
|
532 |
+
def __init__(
|
533 |
+
self,
|
534 |
+
filename,
|
535 |
+
level="info",
|
536 |
+
when="D",
|
537 |
+
backCount=10,
|
538 |
+
fmt="%(asctime)s : %(message)s",
|
539 |
+
):
|
540 |
+
self.level_relations = {
|
541 |
+
"debug": logging.DEBUG,
|
542 |
+
"info": logging.INFO,
|
543 |
+
"warning": logging.WARNING,
|
544 |
+
"error": logging.ERROR,
|
545 |
+
"crit": logging.CRITICAL,
|
546 |
+
}
|
547 |
+
if level == "debug":
|
548 |
+
fmt = "%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s"
|
549 |
+
self.logger = logging.getLogger(filename)
|
550 |
+
format_str = logging.Formatter(fmt)
|
551 |
+
self.logger.setLevel(self.level_relations.get(level))
|
552 |
+
sh = logging.StreamHandler()
|
553 |
+
sh.setFormatter(format_str)
|
554 |
+
th = handlers.TimedRotatingFileHandler(
|
555 |
+
filename=filename, when=when, backupCount=backCount, encoding="utf-8"
|
556 |
+
)
|
557 |
+
th.setFormatter(format_str)
|
558 |
+
self.logger.addHandler(sh)
|
559 |
+
self.logger.addHandler(th)
|
560 |
+
self.logger.info(
|
561 |
+
"==========================New Starting Here=============================="
|
562 |
+
)
|
563 |
+
|
564 |
+
|
565 |
+
def init_weights(m, mean=0.0, std=0.01):
|
566 |
+
classname = m.__class__.__name__
|
567 |
+
if classname.find("Conv") != -1:
|
568 |
+
m.weight.data.normal_(mean, std)
|
569 |
+
|
570 |
+
|
571 |
+
def get_padding(kernel_size, dilation=1):
|
572 |
+
return int((kernel_size * dilation - dilation) / 2)
|
573 |
+
|
574 |
+
|
575 |
+
def slice_segments(x, ids_str, segment_size=4):
|
576 |
+
ret = torch.zeros_like(x[:, :, :segment_size])
|
577 |
+
for i in range(x.size(0)):
|
578 |
+
idx_str = ids_str[i]
|
579 |
+
idx_end = idx_str + segment_size
|
580 |
+
ret[i] = x[i, :, idx_str:idx_end]
|
581 |
+
return ret
|
582 |
+
|
583 |
+
|
584 |
+
def rand_slice_segments(x, x_lengths=None, segment_size=4):
|
585 |
+
b, d, t = x.size()
|
586 |
+
if x_lengths is None:
|
587 |
+
x_lengths = t
|
588 |
+
ids_str_max = x_lengths - segment_size + 1
|
589 |
+
ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long)
|
590 |
+
ret = slice_segments(x, ids_str, segment_size)
|
591 |
+
return ret, ids_str
|
592 |
+
|
593 |
+
|
594 |
+
def subsequent_mask(length):
|
595 |
+
mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0)
|
596 |
+
return mask
|
597 |
+
|
598 |
+
|
599 |
+
@torch.jit.script
|
600 |
+
def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
|
601 |
+
n_channels_int = n_channels[0]
|
602 |
+
in_act = input_a + input_b
|
603 |
+
t_act = torch.tanh(in_act[:, :n_channels_int, :])
|
604 |
+
s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
|
605 |
+
acts = t_act * s_act
|
606 |
+
return acts
|
607 |
+
|
608 |
+
|
609 |
+
def convert_pad_shape(pad_shape):
|
610 |
+
l = pad_shape[::-1]
|
611 |
+
pad_shape = [item for sublist in l for item in sublist]
|
612 |
+
return pad_shape
|
613 |
+
|
614 |
+
|
615 |
+
def sequence_mask(length, max_length=None):
|
616 |
+
if max_length is None:
|
617 |
+
max_length = length.max()
|
618 |
+
x = torch.arange(max_length, dtype=length.dtype, device=length.device)
|
619 |
+
return x.unsqueeze(0) < length.unsqueeze(1)
|
620 |
+
|
621 |
+
|
622 |
+
def generate_path(duration, mask):
|
623 |
+
"""
|
624 |
+
duration: [b, 1, t_x]
|
625 |
+
mask: [b, 1, t_y, t_x]
|
626 |
+
"""
|
627 |
+
device = duration.device
|
628 |
+
|
629 |
+
b, _, t_y, t_x = mask.shape
|
630 |
+
cum_duration = torch.cumsum(duration, -1)
|
631 |
+
|
632 |
+
cum_duration_flat = cum_duration.view(b * t_x)
|
633 |
+
path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype)
|
634 |
+
path = path.view(b, t_x, t_y)
|
635 |
+
path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1]
|
636 |
+
path = path.unsqueeze(1).transpose(2, 3) * mask
|
637 |
+
return path
|
638 |
+
|
639 |
+
|
640 |
+
def clip_grad_value_(parameters, clip_value, norm_type=2):
|
641 |
+
if isinstance(parameters, torch.Tensor):
|
642 |
+
parameters = [parameters]
|
643 |
+
parameters = list(filter(lambda p: p.grad is not None, parameters))
|
644 |
+
norm_type = float(norm_type)
|
645 |
+
if clip_value is not None:
|
646 |
+
clip_value = float(clip_value)
|
647 |
+
|
648 |
+
total_norm = 0
|
649 |
+
for p in parameters:
|
650 |
+
param_norm = p.grad.data.norm(norm_type)
|
651 |
+
total_norm += param_norm.item() ** norm_type
|
652 |
+
if clip_value is not None:
|
653 |
+
p.grad.data.clamp_(min=-clip_value, max=clip_value)
|
654 |
+
total_norm = total_norm ** (1.0 / norm_type)
|
655 |
+
return total_norm
|
656 |
+
|
657 |
+
|
658 |
+
def get_current_time():
|
659 |
+
pass
|
660 |
+
|
661 |
+
|
662 |
+
def make_pad_mask(lengths: torch.Tensor, max_len: int = 0) -> torch.Tensor:
|
663 |
+
"""
|
664 |
+
Args:
|
665 |
+
lengths:
|
666 |
+
A 1-D tensor containing sentence lengths.
|
667 |
+
max_len:
|
668 |
+
The length of masks.
|
669 |
+
Returns:
|
670 |
+
Return a 2-D bool tensor, where masked positions
|
671 |
+
are filled with `True` and non-masked positions are
|
672 |
+
filled with `False`.
|
673 |
+
|
674 |
+
>>> lengths = torch.tensor([1, 3, 2, 5])
|
675 |
+
>>> make_pad_mask(lengths)
|
676 |
+
tensor([[False, True, True, True, True],
|
677 |
+
[False, False, False, True, True],
|
678 |
+
[False, False, True, True, True],
|
679 |
+
[False, False, False, False, False]])
|
680 |
+
"""
|
681 |
+
assert lengths.ndim == 1, lengths.ndim
|
682 |
+
max_len = max(max_len, lengths.max())
|
683 |
+
n = lengths.size(0)
|
684 |
+
seq_range = torch.arange(0, max_len, device=lengths.device)
|
685 |
+
expaned_lengths = seq_range.unsqueeze(0).expand(n, max_len)
|
686 |
+
|
687 |
+
return expaned_lengths >= lengths.unsqueeze(-1)
|
utils/whisper_transcription.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
import os
|
7 |
+
import pathlib
|
8 |
+
import string
|
9 |
+
import time
|
10 |
+
from multiprocessing import Pool, Value, Lock
|
11 |
+
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
|
12 |
+
import torch
|
13 |
+
import whisper
|
14 |
+
|
15 |
+
processed_files_count = Value("i", 0) # count of processed files
|
16 |
+
lock = Lock() # lock for the count
|
17 |
+
|
18 |
+
|
19 |
+
def preprocess_text(text):
|
20 |
+
"""Preprocess text after ASR"""
|
21 |
+
return text.lower().translate(str.maketrans("", "", string.punctuation))
|
22 |
+
|
23 |
+
|
24 |
+
def transcribe_audio(model, processor, audio_file, device):
|
25 |
+
"""Transcribe audio file"""
|
26 |
+
audio = whisper.load_audio(audio_file) # load from path
|
27 |
+
audio = whisper.pad_or_trim(audio) # default 30 seconds
|
28 |
+
inputs = whisper.log_mel_spectrogram(audio).to(
|
29 |
+
device=device
|
30 |
+
) # convert to spectrogram
|
31 |
+
inputs = inputs.unsqueeze(0).type(torch.cuda.HalfTensor) # add batch dimension
|
32 |
+
|
33 |
+
outputs = model.generate(
|
34 |
+
inputs=inputs, max_new_tokens=128
|
35 |
+
) # generate transcription
|
36 |
+
transcription = processor.batch_decode(outputs, skip_special_tokens=True)[
|
37 |
+
0
|
38 |
+
] # decode
|
39 |
+
transcription_processed = preprocess_text(transcription) # preprocess
|
40 |
+
return transcription_processed
|
41 |
+
|
42 |
+
|
43 |
+
def write_transcription(audio_file, transcription):
|
44 |
+
"""Write transcription to txt file"""
|
45 |
+
txt_file = audio_file.with_suffix(".txt")
|
46 |
+
with open(txt_file, "w") as file:
|
47 |
+
file.write(transcription)
|
48 |
+
|
49 |
+
|
50 |
+
def init_whisper(model_id, device):
|
51 |
+
"""Initialize whisper model and processor"""
|
52 |
+
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
53 |
+
print(f"Loading model {model_id}") # model_id = "distil-whisper/distil-large-v2"
|
54 |
+
distil_model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
55 |
+
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=False
|
56 |
+
)
|
57 |
+
distil_model = distil_model.to(device)
|
58 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
59 |
+
return distil_model, processor
|
60 |
+
|
61 |
+
|
62 |
+
def asr_wav_files(file_list, gpu_id, total_files, model_id):
|
63 |
+
"""Transcribe wav files in a list"""
|
64 |
+
device = f"cuda:{gpu_id}" if torch.cuda.is_available() else "cpu"
|
65 |
+
whisper_model, processor = init_whisper(model_id, device)
|
66 |
+
print(f"Processing on {device} starts")
|
67 |
+
start_time = time.time()
|
68 |
+
for audio_file in file_list:
|
69 |
+
try:
|
70 |
+
transcription = transcribe_audio(
|
71 |
+
whisper_model, processor, audio_file, device
|
72 |
+
)
|
73 |
+
write_transcription(audio_file, transcription)
|
74 |
+
with lock:
|
75 |
+
processed_files_count.value += 1
|
76 |
+
if processed_files_count.value % 5 == 0:
|
77 |
+
current_time = time.time()
|
78 |
+
avg_time_per_file = (current_time - start_time) / (
|
79 |
+
processed_files_count.value
|
80 |
+
)
|
81 |
+
remaining_files = total_files - processed_files_count.value
|
82 |
+
estimated_time_remaining = avg_time_per_file * remaining_files
|
83 |
+
remaining_time_formatted = time.strftime(
|
84 |
+
"%H:%M:%S", time.gmtime(estimated_time_remaining)
|
85 |
+
)
|
86 |
+
print(
|
87 |
+
f"Processed {processed_files_count.value}/{total_files} files, time: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())}, Estimated time remaining: {remaining_time_formatted}"
|
88 |
+
)
|
89 |
+
except Exception as e:
|
90 |
+
print(f"Error processing file {audio_file}: {e}")
|
91 |
+
|
92 |
+
|
93 |
+
def asr_main(input_dir, num_gpus, model_id):
|
94 |
+
"""Transcribe wav files in a directory"""
|
95 |
+
num_processes = min(num_gpus, os.cpu_count())
|
96 |
+
print(f"Using {num_processes} GPUs for transcription")
|
97 |
+
wav_files = list(pathlib.Path(input_dir).rglob("*.wav"))
|
98 |
+
total_files = len(wav_files)
|
99 |
+
print(f"Found {total_files} wav files in {input_dir}")
|
100 |
+
files_per_process = len(wav_files) // num_processes
|
101 |
+
print(f"Processing {files_per_process} files per process")
|
102 |
+
with Pool(num_processes) as p:
|
103 |
+
p.starmap(
|
104 |
+
asr_wav_files,
|
105 |
+
[
|
106 |
+
(
|
107 |
+
wav_files[i * files_per_process : (i + 1) * files_per_process],
|
108 |
+
i % num_gpus,
|
109 |
+
total_files,
|
110 |
+
model_id,
|
111 |
+
)
|
112 |
+
for i in range(num_processes)
|
113 |
+
],
|
114 |
+
)
|
115 |
+
print("Done!")
|
116 |
+
|
117 |
+
|
118 |
+
if __name__ == "__main__":
|
119 |
+
input_dir = "/path/to/output/directory"
|
120 |
+
num_gpus = 2
|
121 |
+
model_id = "distil-whisper/distil-large-v2"
|
122 |
+
asr_main(input_dir, num_gpus, model_id)
|
utils/world.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
# 1. Extract WORLD features including F0, AP, SP
|
7 |
+
# 2. Transform between SP and MCEP
|
8 |
+
import torchaudio
|
9 |
+
import pyworld as pw
|
10 |
+
import numpy as np
|
11 |
+
import torch
|
12 |
+
import diffsptk
|
13 |
+
import os
|
14 |
+
from tqdm import tqdm
|
15 |
+
import pickle
|
16 |
+
import torchaudio
|
17 |
+
|
18 |
+
|
19 |
+
def get_mcep_params(fs):
|
20 |
+
"""Hyperparameters of transformation between SP and MCEP
|
21 |
+
|
22 |
+
Reference:
|
23 |
+
https://github.com/CSTR-Edinburgh/merlin/blob/master/misc/scripts/vocoder/world_v2/copy_synthesis.sh
|
24 |
+
|
25 |
+
"""
|
26 |
+
if fs in [44100, 48000]:
|
27 |
+
fft_size = 2048
|
28 |
+
alpha = 0.77
|
29 |
+
if fs in [16000]:
|
30 |
+
fft_size = 1024
|
31 |
+
alpha = 0.58
|
32 |
+
return fft_size, alpha
|
33 |
+
|
34 |
+
|
35 |
+
def extract_world_features(waveform, frameshift=10):
|
36 |
+
# waveform: (1, seq)
|
37 |
+
# x: (seq,)
|
38 |
+
x = np.array(waveform, dtype=np.double)
|
39 |
+
|
40 |
+
_f0, t = pw.dio(x, fs, frame_period=frameshift) # raw pitch extractor
|
41 |
+
f0 = pw.stonemask(x, _f0, t, fs) # pitch refinement
|
42 |
+
sp = pw.cheaptrick(x, f0, t, fs) # extract smoothed spectrogram
|
43 |
+
ap = pw.d4c(x, f0, t, fs) # extract aperiodicity
|
44 |
+
|
45 |
+
return f0, sp, ap, fs
|
46 |
+
|
47 |
+
|
48 |
+
def sp2mcep(x, mcsize, fs):
|
49 |
+
fft_size, alpha = get_mcep_params(fs)
|
50 |
+
x = torch.as_tensor(x, dtype=torch.float)
|
51 |
+
|
52 |
+
tmp = diffsptk.ScalarOperation("SquareRoot")(x)
|
53 |
+
tmp = diffsptk.ScalarOperation("Multiplication", 32768.0)(tmp)
|
54 |
+
mgc = diffsptk.MelCepstralAnalysis(
|
55 |
+
cep_order=mcsize - 1, fft_length=fft_size, alpha=alpha, n_iter=1
|
56 |
+
)(tmp)
|
57 |
+
return mgc.numpy()
|
58 |
+
|
59 |
+
|
60 |
+
def mcep2sp(x, mcsize, fs):
|
61 |
+
fft_size, alpha = get_mcep_params(fs)
|
62 |
+
x = torch.as_tensor(x, dtype=torch.float)
|
63 |
+
|
64 |
+
tmp = diffsptk.MelGeneralizedCepstrumToSpectrum(
|
65 |
+
alpha=alpha,
|
66 |
+
cep_order=mcsize - 1,
|
67 |
+
fft_length=fft_size,
|
68 |
+
)(x)
|
69 |
+
tmp = diffsptk.ScalarOperation("Division", 32768.0)(tmp)
|
70 |
+
sp = diffsptk.ScalarOperation("Power", 2)(tmp)
|
71 |
+
return sp.double().numpy()
|
72 |
+
|
73 |
+
|
74 |
+
def f0_statistics(f0_features, path):
|
75 |
+
print("\nF0 statistics...")
|
76 |
+
|
77 |
+
total_f0 = []
|
78 |
+
for f0 in tqdm(f0_features):
|
79 |
+
total_f0 += [f for f in f0 if f != 0]
|
80 |
+
|
81 |
+
mean = sum(total_f0) / len(total_f0)
|
82 |
+
print("Min = {}, Max = {}, Mean = {}".format(min(total_f0), max(total_f0), mean))
|
83 |
+
|
84 |
+
with open(path, "wb") as f:
|
85 |
+
pickle.dump([mean, total_f0], f)
|
86 |
+
|
87 |
+
|
88 |
+
def world_synthesis(f0, sp, ap, fs, frameshift):
|
89 |
+
y = pw.synthesize(
|
90 |
+
f0, sp, ap, fs, frame_period=frameshift
|
91 |
+
) # synthesize an utterance using the parameters
|
92 |
+
return y
|
visualization/SingVisio/System_Introduction_of_SingVisio_V2.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dd205eace26d91a558e70662a61f017e3ca78e89d98cf45a72ee0911c6a64d2
|
3 |
+
size 4592895
|
visualization/SingVisio/webpage/Dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023 Amphion.
|
2 |
+
#
|
3 |
+
# This source code is licensed under the MIT license found in the
|
4 |
+
# LICENSE file in the root directory of this source tree.
|
5 |
+
|
6 |
+
FROM python:3.10
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
COPY resources ./resources
|
11 |
+
COPY img ./img
|
12 |
+
COPY index.html ./index.html
|
13 |
+
COPY server.py ./server.py
|
14 |
+
COPY config ./config
|
15 |
+
|
16 |
+
RUN pip install numpy scikit-learn flask flask_cors gunicorn -i https://pypi.tuna.tsinghua.edu.cn/simple
|
17 |
+
|
18 |
+
EXPOSE 8000
|
19 |
+
|
20 |
+
ENTRYPOINT ["gunicorn", "-w", "8", "-b", "0.0.0.0:8000", "server:app"]
|
21 |
+
|
22 |
+
# docker build -t singvisio .
|
23 |
+
# docker run -v $(pwd)/data:/app/data -p 8000:8000 singvisio
|
visualization/SingVisio/webpage/README.md
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## SingVisio Webpage
|
2 |
+
|
3 |
+
This is the source code for the SingVisio Webpage. This README file will introduce the project and provide an installation guide. For introduction to SingVisio, please check this [README.md](../../../egs/visualization/SingVisio/README.md) file.
|
4 |
+
|
5 |
+
### Tech Stack
|
6 |
+
|
7 |
+
- [Tailwind CSS](https://tailwindcss.com/)
|
8 |
+
- [Flowbite](https://flowbite.com/)
|
9 |
+
- [D3.js](https://d3js.org/)
|
10 |
+
- [Driver.js](https://driverjs.com/)
|
11 |
+
|
12 |
+
### Structure
|
13 |
+
|
14 |
+
- `index.html`: The entry point file.
|
15 |
+
- `config`: Contains JSON configuration files loaded by `index.html`.
|
16 |
+
- `img`: Image files.
|
17 |
+
- `resources`: Contains CSS styles and JavaScript files.
|
18 |
+
- `init.js`: Loads the configuration and initializes variables.
|
19 |
+
- `function.js`: Houses the functions used in this project.
|
20 |
+
- `event.js`: Binds webpage mouse and keyboard events to functions.
|
21 |
+
- `Dockerfile`: For building a Docker image if deployment is needed.
|
22 |
+
|
23 |
+
### Configuration
|
24 |
+
|
25 |
+
Before installation, you need to configure the data path in the `config/default.json` file.
|
26 |
+
|
27 |
+
To better understand our project, please note that this configuration pertains to our pre-processed data. If you want to visualize your own data, you can follow the guide below to properly set up the system.
|
28 |
+
|
29 |
+
1. **Update the Data Configuration** in the `config/default.json` file.
|
30 |
+
|
31 |
+
SingVisio will read the configuration from this JSON file and render the webpage. Be aware that any errors in the JSON file may cause the system to shut down.
|
32 |
+
|
33 |
+
```json
|
34 |
+
{
|
35 |
+
"pathData": {
|
36 |
+
"<mode_name>": { // supports multiple modes
|
37 |
+
"users": ["basic", "advanced"], // mode choice: "basic" or "advanced"
|
38 |
+
"multi": ["<id>"], // song_id, sourcesinger_id, or target_id. Set to false to disable. Enables multiple choices for the configured checkbox.
|
39 |
+
"curve": true, // set to true if the metric curve is needed
|
40 |
+
"referenceMap": { // configures reference paths when multiple choices are enabled.
|
41 |
+
"<sourcesinger_id>": [ // e.g., m4singer_Tenor-6
|
42 |
+
"<path_to_wav>", // e.g., Tenor-6_寂寞沙洲冷_0002
|
43 |
+
]
|
44 |
+
},
|
45 |
+
"data": [
|
46 |
+
{ // supports multiple datasets
|
47 |
+
"dataset": "<dataset_name>",
|
48 |
+
"basePath": "<path_to_the_processed_data>",
|
49 |
+
"pathMap": {
|
50 |
+
"<sourcesinger_id>": {
|
51 |
+
"songs": [
|
52 |
+
"<song_id>" // set song ID; supports multiple IDs
|
53 |
+
],
|
54 |
+
"targets": [
|
55 |
+
"<target_id>" // set target singer ID; supports multiple IDs
|
56 |
+
]
|
57 |
+
}
|
58 |
+
}
|
59 |
+
}
|
60 |
+
]
|
61 |
+
}
|
62 |
+
},
|
63 |
+
"mapToName": {
|
64 |
+
"<map_from>": "<map_to>"
|
65 |
+
},
|
66 |
+
"mapToSong": {
|
67 |
+
"<map_from>": "<map_to>"
|
68 |
+
},
|
69 |
+
"mapToSpace": {
|
70 |
+
"<map_from>": "<map_to>"
|
71 |
+
},
|
72 |
+
"picTypes": [
|
73 |
+
"<pic_type>" // supports multiple types
|
74 |
+
],
|
75 |
+
"evaluation_data": [
|
76 |
+
{ // supports multiple data sets
|
77 |
+
"target": "<target_id>",
|
78 |
+
"sourcesinger": "<sourcesinger_id>",
|
79 |
+
"song": "<song_id>",
|
80 |
+
"best": [
|
81 |
+
"<best_metric>" // activated when clicking the respective metric
|
82 |
+
]
|
83 |
+
},
|
84 |
+
],
|
85 |
+
"colorList": [
|
86 |
+
"<color_hex_code>" // supports multiple colors
|
87 |
+
],
|
88 |
+
"histogramData": [
|
89 |
+
{ // displayed in the top left graph
|
90 |
+
"type": "high", // "high" or "low"; "high" means the higher, the better
|
91 |
+
"name": "<metric_name>",
|
92 |
+
"value": <metric_value>
|
93 |
+
}
|
94 |
+
]
|
95 |
+
}
|
96 |
+
```
|
97 |
+
|
98 |
+
2. **Change the Data Source Path**
|
99 |
+
|
100 |
+
The total size of our pre-processed data is approximately 60-70 GB. We provide an online host server, and the server path (`baseLink`) can be modified in the `index.html` file on line 15.
|
101 |
+
|
102 |
+
If you prefer to host the data on your local computer, you can set the `baseLink` value to an empty string as shown below. This will direct the server to read data from your local `data` folder.
|
103 |
+
|
104 |
+
```html
|
105 |
+
<script>
|
106 |
+
const baseLink = ''; // do not end with '/'
|
107 |
+
</script>
|
108 |
+
```
|
109 |
+
|
110 |
+
### Installation
|
111 |
+
|
112 |
+
This project does not require a build process. There are multiple ways to run it, but here we introduce the simplest method:
|
113 |
+
|
114 |
+
1. Install Python 3.10 and required packages.
|
115 |
+
```bash
|
116 |
+
pip install numpy scikit-learn flask flask_cors gunicorn
|
117 |
+
```
|
118 |
+
|
119 |
+
2. Run the following command to start the HTTP server:
|
120 |
+
|
121 |
+
```bash
|
122 |
+
cd webpage
|
123 |
+
gunicorn -w 8 -b 0.0.0.0:8080 server:app
|
124 |
+
```
|
125 |
+
|
126 |
+
3. After starting the HTTP web server, open the following link in your browser: [http://localhost:8080/](http://localhost:8080/)
|
visualization/SingVisio/webpage/config/default.json
ADDED
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"pathData": {
|
3 |
+
"Step Comparison": {
|
4 |
+
"users": ["basic", "advanced"],
|
5 |
+
"multi": false,
|
6 |
+
"data": [
|
7 |
+
{
|
8 |
+
"dataset": "SVCC",
|
9 |
+
"basePath": "data/gd_svcc",
|
10 |
+
"pathMap": {
|
11 |
+
"SF1": {
|
12 |
+
"songs": [
|
13 |
+
"30001",
|
14 |
+
"30002",
|
15 |
+
"30003"
|
16 |
+
],
|
17 |
+
"targets": [
|
18 |
+
"svcc_IDF1",
|
19 |
+
"svcc_IDM1",
|
20 |
+
"svcc_CDF1",
|
21 |
+
"svcc_CDM1"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
"SM1": {
|
25 |
+
"songs": [
|
26 |
+
"30001",
|
27 |
+
"30002",
|
28 |
+
"30003"
|
29 |
+
],
|
30 |
+
"targets": [
|
31 |
+
"svcc_IDF1",
|
32 |
+
"svcc_IDM1",
|
33 |
+
"svcc_CDF1",
|
34 |
+
"svcc_CDM1"
|
35 |
+
]
|
36 |
+
}
|
37 |
+
}
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"dataset": "M4Singer",
|
41 |
+
"basePath": "data/gd_m4sg",
|
42 |
+
"pathMap": {
|
43 |
+
"Alto-1": {
|
44 |
+
"songs": [
|
45 |
+
"美错_0014"
|
46 |
+
],
|
47 |
+
"targets": [
|
48 |
+
"opencpop"
|
49 |
+
]
|
50 |
+
},
|
51 |
+
"Bass-1": {
|
52 |
+
"songs": [
|
53 |
+
"十年_0008"
|
54 |
+
],
|
55 |
+
"targets": [
|
56 |
+
"opencpop"
|
57 |
+
]
|
58 |
+
},
|
59 |
+
"Soprano-2": {
|
60 |
+
"songs": [
|
61 |
+
"同桌的你_0018"
|
62 |
+
],
|
63 |
+
"targets": [
|
64 |
+
"opencpop"
|
65 |
+
]
|
66 |
+
},
|
67 |
+
"Tenor-5": {
|
68 |
+
"songs": [
|
69 |
+
"爱笑的眼睛_0010"
|
70 |
+
],
|
71 |
+
"targets": [
|
72 |
+
"opencpop"
|
73 |
+
]
|
74 |
+
}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
]
|
78 |
+
},
|
79 |
+
"Metric Comparison": {
|
80 |
+
"users": ["basic", "advanced"],
|
81 |
+
"multi": false,
|
82 |
+
"curve": true,
|
83 |
+
"data": [
|
84 |
+
{
|
85 |
+
"dataset": "SVCC",
|
86 |
+
"basePath": "data/ev_best",
|
87 |
+
"pathMap": {
|
88 |
+
"SM1": {
|
89 |
+
"songs": [
|
90 |
+
"30009"
|
91 |
+
],
|
92 |
+
"targets": [
|
93 |
+
"svcc_IDM1"
|
94 |
+
]
|
95 |
+
},
|
96 |
+
"SF1": {
|
97 |
+
"songs": [
|
98 |
+
"30005",
|
99 |
+
"30006",
|
100 |
+
"30009",
|
101 |
+
"30016",
|
102 |
+
"30022",
|
103 |
+
"30019"
|
104 |
+
],
|
105 |
+
"targets": [
|
106 |
+
"svcc_IDF1"
|
107 |
+
]
|
108 |
+
}
|
109 |
+
}
|
110 |
+
}
|
111 |
+
]
|
112 |
+
},
|
113 |
+
"Source Singer Comparison": {
|
114 |
+
"users": ["advanced"],
|
115 |
+
"multi": [
|
116 |
+
"sourcesinger_id"
|
117 |
+
],
|
118 |
+
"referenceMap": {
|
119 |
+
"m4singer_Alto-7": [
|
120 |
+
"Alto-7_寂寞沙洲冷_0000",
|
121 |
+
"Alto-7_寂寞沙洲冷_0011"
|
122 |
+
],
|
123 |
+
"m4singer_Bass-1": [
|
124 |
+
"Bass-1_寂寞沙洲冷_0002",
|
125 |
+
"Bass-1_寂寞沙洲冷_0021"
|
126 |
+
],
|
127 |
+
"m4singer_Tenor-6": [
|
128 |
+
"Tenor-6_寂寞沙洲冷_0002",
|
129 |
+
"Tenor-6_寂寞沙洲冷_0020"
|
130 |
+
],
|
131 |
+
"m4singer_Tenor-7": [
|
132 |
+
"Tenor-7_寂寞沙洲冷_0002",
|
133 |
+
"Tenor-7_寂寞沙洲冷_0013",
|
134 |
+
"Tenor-7_寂寞沙洲冷_0023"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
"indexMode": "number",
|
138 |
+
"data": [
|
139 |
+
{
|
140 |
+
"dataset": "M4Singer",
|
141 |
+
"basePath": "data/dc_dss",
|
142 |
+
"pathMap": {
|
143 |
+
"Alto-7": {
|
144 |
+
"songs": [
|
145 |
+
"寂寞沙洲冷_0000",
|
146 |
+
"寂寞沙洲冷_0011"
|
147 |
+
],
|
148 |
+
"targets": [
|
149 |
+
"m4singer_Tenor-7",
|
150 |
+
"m4singer_Alto-7"
|
151 |
+
]
|
152 |
+
},
|
153 |
+
"Bass-1": {
|
154 |
+
"songs": [
|
155 |
+
"寂寞沙洲冷_0002",
|
156 |
+
"寂寞沙洲冷_0021"
|
157 |
+
],
|
158 |
+
"targets": [
|
159 |
+
"m4singer_Tenor-7",
|
160 |
+
"m4singer_Bass-1"
|
161 |
+
]
|
162 |
+
},
|
163 |
+
"Tenor-6": {
|
164 |
+
"songs": [
|
165 |
+
"寂寞沙洲冷_0002",
|
166 |
+
"寂寞沙洲冷_0020"
|
167 |
+
],
|
168 |
+
"targets": [
|
169 |
+
"m4singer_Tenor-7",
|
170 |
+
"m4singer_Tenor-6"
|
171 |
+
]
|
172 |
+
},
|
173 |
+
"Tenor-7": {
|
174 |
+
"songs": [
|
175 |
+
"寂寞沙洲冷_0002",
|
176 |
+
"寂寞沙洲冷_0013"
|
177 |
+
],
|
178 |
+
"targets": [
|
179 |
+
"m4singer_Alto-7",
|
180 |
+
"m4singer_Bass-1",
|
181 |
+
"m4singer_Tenor-6"
|
182 |
+
]
|
183 |
+
}
|
184 |
+
}
|
185 |
+
}
|
186 |
+
]
|
187 |
+
},
|
188 |
+
"Song Comparison": {
|
189 |
+
"users": ["advanced"],
|
190 |
+
"multi": [
|
191 |
+
"song_id"
|
192 |
+
],
|
193 |
+
"referenceMap": {
|
194 |
+
"m4singer_Tenor-6": [
|
195 |
+
"Tenor-6_寂寞沙洲冷_0002",
|
196 |
+
"Tenor-6_寂寞沙洲冷_0020"
|
197 |
+
],
|
198 |
+
"m4singer_Tenor-7": [
|
199 |
+
"Tenor-7_寂寞沙洲冷_0002",
|
200 |
+
"Tenor-7_寂寞沙洲冷_0013"
|
201 |
+
]
|
202 |
+
},
|
203 |
+
"data": [
|
204 |
+
{
|
205 |
+
"dataset": "M4Singer",
|
206 |
+
"basePath": "data/dc_dss",
|
207 |
+
"pathMap": {
|
208 |
+
"Tenor-6": {
|
209 |
+
"songs": [
|
210 |
+
"寂寞沙洲冷_0002",
|
211 |
+
"寂寞沙洲冷_0020"
|
212 |
+
],
|
213 |
+
"targets": [
|
214 |
+
"m4singer_Tenor-7",
|
215 |
+
"m4singer_Tenor-6"
|
216 |
+
]
|
217 |
+
}
|
218 |
+
}
|
219 |
+
}
|
220 |
+
]
|
221 |
+
},
|
222 |
+
"Target Singer Comparison": {
|
223 |
+
"users": ["advanced"],
|
224 |
+
"multi": [
|
225 |
+
"song_id",
|
226 |
+
"target_id"
|
227 |
+
],
|
228 |
+
"referenceMap": {
|
229 |
+
"m4singer_Alto-7": [
|
230 |
+
"Alto-7_寂寞沙洲冷_0000",
|
231 |
+
"Alto-7_寂寞沙洲冷_0011"
|
232 |
+
],
|
233 |
+
"m4singer_Bass-1": [
|
234 |
+
"Bass-1_寂寞沙洲冷_0002",
|
235 |
+
"Bass-1_寂寞沙洲冷_0021"
|
236 |
+
],
|
237 |
+
"m4singer_Tenor-7": [
|
238 |
+
"Tenor-7_寂寞沙洲冷_0002",
|
239 |
+
"Tenor-7_寂寞沙洲冷_0013"
|
240 |
+
],
|
241 |
+
"m4singer_Tenor-6": [
|
242 |
+
"Tenor-6_寂寞沙洲冷_0002",
|
243 |
+
"Tenor-6_寂寞沙洲冷_0020"
|
244 |
+
]
|
245 |
+
},
|
246 |
+
"data": [
|
247 |
+
{
|
248 |
+
"dataset": "M4Singer",
|
249 |
+
"basePath": "data/dc_ssd",
|
250 |
+
"pathMap": {
|
251 |
+
"Tenor-6": {
|
252 |
+
"songs": [
|
253 |
+
"寂寞沙洲冷_0002",
|
254 |
+
"寂寞沙洲冷_0020"
|
255 |
+
],
|
256 |
+
"targets": [
|
257 |
+
"m4singer_Alto-7",
|
258 |
+
"m4singer_Bass-1",
|
259 |
+
"m4singer_Tenor-7",
|
260 |
+
"m4singer_Tenor-6"
|
261 |
+
]
|
262 |
+
}
|
263 |
+
}
|
264 |
+
}
|
265 |
+
]
|
266 |
+
}
|
267 |
+
},
|
268 |
+
"mapToName": {
|
269 |
+
"SF1": "Singer 1",
|
270 |
+
"SM1": "Singer 2",
|
271 |
+
"CDF1": "Singer 3",
|
272 |
+
"CDM1": "Singer 4",
|
273 |
+
"IDF1": "Singer 5",
|
274 |
+
"IDM1": "Singer 6",
|
275 |
+
"svcc_CDF1": "Singer 3",
|
276 |
+
"svcc_CDM1": "Singer 4",
|
277 |
+
"svcc_IDF1": "Singer 5",
|
278 |
+
"svcc_IDM1": "Singer 6",
|
279 |
+
"Alto-1": "Singer 7",
|
280 |
+
"m4singer_Alto-1": "Singer 7",
|
281 |
+
"Alto-7": "Singer 8",
|
282 |
+
"m4singer_Alto-7": "Singer 8",
|
283 |
+
"Bass-1": "Singer 9",
|
284 |
+
"m4singer_Bass-1": "Singer 9",
|
285 |
+
"Soprano-2": "Singer 10",
|
286 |
+
"m4singer_Soprano-2": "Singer 10",
|
287 |
+
"Tenor-5": "Singer 11",
|
288 |
+
"m4singer_Tenor-5": "Singer 11",
|
289 |
+
"Tenor-6": "Singer 12",
|
290 |
+
"m4singer_Tenor-6": "Singer 12",
|
291 |
+
"Tenor-7": "Singer 13",
|
292 |
+
"m4singer_Tenor-7": "Singer 13",
|
293 |
+
"opencpop": "Singer 14"
|
294 |
+
},
|
295 |
+
"mapToSong": {
|
296 |
+
"30001": "Song 1",
|
297 |
+
"30002": "Song 2",
|
298 |
+
"30003": "Song 3",
|
299 |
+
"10001": "Song 4",
|
300 |
+
"10030": "Song 5",
|
301 |
+
"10120": "Song 6",
|
302 |
+
"10140": "Song 7",
|
303 |
+
"美错_0014": "Song 8",
|
304 |
+
"十年_0008": "Song 9",
|
305 |
+
"同桌的你_0018": "Song 10",
|
306 |
+
"爱笑的眼睛_0010": "Song 11",
|
307 |
+
"寂寞沙洲冷_0000": "Song 12",
|
308 |
+
"寂寞沙洲冷_0002": "Song 12",
|
309 |
+
"寂寞沙洲冷_0011": "Song 13",
|
310 |
+
"寂寞沙洲冷_0013": "Song 13",
|
311 |
+
"寂寞沙洲冷_0020": "Song 13",
|
312 |
+
"寂寞沙洲冷_0021": "Song 14",
|
313 |
+
"30005": "Song 15",
|
314 |
+
"30006": "Song 16",
|
315 |
+
"30009": "Song 17",
|
316 |
+
"30016": "Song 18",
|
317 |
+
"30022": "Song 19",
|
318 |
+
"30019": "Song 20"
|
319 |
+
},
|
320 |
+
"mapToSpace": {
|
321 |
+
"encoded_step": "Step (Diffusion step)",
|
322 |
+
"noise_step_layer0": "Step + Noise (First layer)",
|
323 |
+
"noise_step_layer10": "Step + Noise (Middle layer)",
|
324 |
+
"noise_step_layer19": "Step + Noise (Last layer)",
|
325 |
+
"noise_step_condition_layer0": "Step + Noise + Condition (First layer)",
|
326 |
+
"noise_step_condition_layer10": "Step + Noise + Condition (Middle layer)",
|
327 |
+
"noise_step_condition_layer19": "Step + Noise + Condition (Last layer)"
|
328 |
+
},
|
329 |
+
"picTypes": [
|
330 |
+
"encoded_step",
|
331 |
+
"noise_step_layer0",
|
332 |
+
"noise_step_layer10",
|
333 |
+
"noise_step_layer19",
|
334 |
+
"noise_step_condition_layer0",
|
335 |
+
"noise_step_condition_layer10",
|
336 |
+
"noise_step_condition_layer19"
|
337 |
+
],
|
338 |
+
"evaluation_data": [
|
339 |
+
{
|
340 |
+
"target": "svcc_IDM1",
|
341 |
+
"sourcesinger": "SM1",
|
342 |
+
"song": "30009",
|
343 |
+
"best": [
|
344 |
+
"MCD"
|
345 |
+
]
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"target": "svcc_IDF1",
|
349 |
+
"sourcesinger": "SF1",
|
350 |
+
"song": "30016",
|
351 |
+
"best": [
|
352 |
+
"F0CORR",
|
353 |
+
"FAD"
|
354 |
+
]
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"target": "svcc_IDF1",
|
358 |
+
"sourcesinger": "SF1",
|
359 |
+
"song": "30009",
|
360 |
+
"best": [
|
361 |
+
"F0RMSE",
|
362 |
+
"CER"
|
363 |
+
]
|
364 |
+
},
|
365 |
+
{
|
366 |
+
"target": "svcc_IDF1",
|
367 |
+
"sourcesinger": "SF1",
|
368 |
+
"song": "30019",
|
369 |
+
"best": [
|
370 |
+
"Dembed"
|
371 |
+
]
|
372 |
+
}
|
373 |
+
],
|
374 |
+
"colorList": [
|
375 |
+
"#FFA500",
|
376 |
+
"#1C64F2",
|
377 |
+
"#7E3AF2",
|
378 |
+
"#9F580A"
|
379 |
+
],
|
380 |
+
"histogramData": [
|
381 |
+
{
|
382 |
+
"type": "high",
|
383 |
+
"name": "F0CORR",
|
384 |
+
"value": 0.946698913
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"type": "high",
|
388 |
+
"name": "Dembed",
|
389 |
+
"value": 0.688410708
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"type": "low",
|
393 |
+
"name": "MCD",
|
394 |
+
"value": 11.44773471
|
395 |
+
},
|
396 |
+
{
|
397 |
+
"type": "low",
|
398 |
+
"name": "F0RMSE",
|
399 |
+
"value": 70.81400428
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"type": "low",
|
403 |
+
"name": "FAD",
|
404 |
+
"value": 10.35121372
|
405 |
+
}
|
406 |
+
]
|
407 |
+
}
|
visualization/SingVisio/webpage/img/difference_bar.jpg
ADDED
visualization/SingVisio/webpage/img/syllable.png
ADDED
visualization/SingVisio/webpage/index.html
ADDED
@@ -0,0 +1,390 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!--
|
2 |
+
Copyright (c) 2023 Amphion.
|
3 |
+
This source code is licensed under the MIT license found in the
|
4 |
+
LICENSE file in the root directory of this source tree。
|
5 |
+
-->
|
6 |
+
|
7 |
+
<!DOCTYPE html>
|
8 |
+
<html>
|
9 |
+
|
10 |
+
<head>
|
11 |
+
<meta charset="utf-8">
|
12 |
+
<meta name="viewport" content="width=1200px, initial-scale=1.0">
|
13 |
+
<title>SingVisio: Visual Analytics of Diffusion Model for Singing Voice Conversion</title>
|
14 |
+
<script>
|
15 |
+
const baseLink = 'https://dsvc.openmmlab.org.cn'; // end without '/'
|
16 |
+
</script>
|
17 |
+
<!-- Load Tailwind CSS and D3.js -->
|
18 |
+
<script src="./resources/tailwind.js"></script>
|
19 |
+
<script src="./resources/d3.v4.min.js"></script>
|
20 |
+
<script src="./resources/htl.min.js"></script>
|
21 |
+
<script src="./resources/d3-scale-chromatic.v1.min.js"></script>
|
22 |
+
<script src="./resources/d3-contour.v1.min.js"></script>
|
23 |
+
<!-- Load the Guide driver -->
|
24 |
+
<script src="./resources/driver.js.iife.min.js"></script>
|
25 |
+
<link rel="stylesheet" href="./resources/driver.min.css">
|
26 |
+
<!-- Config Tailwind CSS -->
|
27 |
+
<script type="module">
|
28 |
+
import cfg from "./tailwind.config.js";
|
29 |
+
tailwind.config = cfg;
|
30 |
+
</script>
|
31 |
+
<style type="text/tailwindcss">
|
32 |
+
@layer components {
|
33 |
+
.btn-small {
|
34 |
+
@apply px-3 py-2 text-xs font-medium text-center text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-200 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700
|
35 |
+
}
|
36 |
+
.btn {
|
37 |
+
@apply text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 mr-2 mb-2 dark:bg-blue-600 dark:hover:bg-blue-700 focus:outline-none dark:focus:ring-blue-800;
|
38 |
+
}
|
39 |
+
.btn-sec {
|
40 |
+
@apply py-2.5 px-5 mr-2 mb-2 text-sm font-medium text-gray-900 focus:outline-none bg-white rounded-lg border border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-4 focus:ring-gray-200 dark:focus:ring-gray-700 dark:bg-gray-800 dark:text-gray-400 dark:border-gray-600 dark:hover:text-white dark:hover:bg-gray-700
|
41 |
+
}
|
42 |
+
.select-select {
|
43 |
+
@apply my-0 py-2 px-1 bg-gray-50 border border-gray-300 text-gray-900 text-xs rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500
|
44 |
+
}
|
45 |
+
.select-label {
|
46 |
+
@apply block text-sm font-medium text-gray-900 dark:text-white;
|
47 |
+
}
|
48 |
+
.card {
|
49 |
+
@apply p-6 bg-white border border-gray-200 rounded-lg dark:bg-gray-800 dark:border-gray-700
|
50 |
+
}
|
51 |
+
.card-title {
|
52 |
+
@apply mb-2 text-base font-bold tracking-tight text-gray-900 dark:text-white
|
53 |
+
}
|
54 |
+
.timeline-point {
|
55 |
+
@apply absolute w-3 h-3 bg-gray-200 rounded-full mt-1.5 -left-1.5 border border-white dark:border-gray-900 dark:bg-gray-700
|
56 |
+
}
|
57 |
+
.timeline-title {
|
58 |
+
@apply text-lg font-semibold text-gray-900 dark:text-white
|
59 |
+
}
|
60 |
+
.timeline-subtitle {
|
61 |
+
@apply text-base font-normal text-gray-500 dark:text-gray-400
|
62 |
+
}
|
63 |
+
.small-input {
|
64 |
+
@apply block w-full p-2 text-gray-900 border border-gray-300 rounded-lg bg-gray-50 sm:text-xs focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-700 dark:border-gray-600 dark:placeholder-gray-400 dark:text-white dark:focus:ring-blue-500 dark:focus:border-blue-500;
|
65 |
+
}
|
66 |
+
.checkbox {
|
67 |
+
@apply w-4 h-4 text-blue-600 bg-gray-100 border-gray-300 rounded focus:ring-blue-500 dark:focus:ring-blue-600 dark:ring-offset-gray-800 focus:ring-2 dark:bg-gray-700 dark:border-gray-600
|
68 |
+
}
|
69 |
+
.dropdown_button_text {
|
70 |
+
@apply w-full text-xs font-normal text-gray-900 dark:text-white text-left
|
71 |
+
}
|
72 |
+
.dropdown_button {
|
73 |
+
@apply btn-sec text-xs flex items-center w-full px-2 py-2 my-0 disabled:cursor-not-allowed disabled:opacity-50
|
74 |
+
}
|
75 |
+
}
|
76 |
+
</style>
|
77 |
+
<style>
|
78 |
+
input.step-axis {
|
79 |
+
outline: none;
|
80 |
+
-webkit-appearance: none;
|
81 |
+
background: #0000002b;
|
82 |
+
height: 8px;
|
83 |
+
}
|
84 |
+
|
85 |
+
input.step-axis::-webkit-slider-thumb {
|
86 |
+
-webkit-appearance: none;
|
87 |
+
position: relative;
|
88 |
+
width: 18px;
|
89 |
+
height: 18px;
|
90 |
+
background: url("./img/syllable.png") no-repeat;
|
91 |
+
background-size: 18px;
|
92 |
+
border-radius: 50%;
|
93 |
+
cursor: pointer;
|
94 |
+
}
|
95 |
+
|
96 |
+
audio {
|
97 |
+
outline: none;
|
98 |
+
height: 34px;
|
99 |
+
}
|
100 |
+
|
101 |
+
/* make a input with two handles */
|
102 |
+
.inputs {
|
103 |
+
display: block;
|
104 |
+
width: 100%;
|
105 |
+
height: 10px;
|
106 |
+
/* background-color: azure; */
|
107 |
+
}
|
108 |
+
|
109 |
+
.inputs input {
|
110 |
+
position: absolute;
|
111 |
+
}
|
112 |
+
|
113 |
+
.inputs input::-webkit-slider-thumb {
|
114 |
+
pointer-events: all;
|
115 |
+
z-index: 2;
|
116 |
+
}
|
117 |
+
|
118 |
+
.inputs input::-webkit-slider-runnable-track {
|
119 |
+
pointer-events: none;
|
120 |
+
z-index: 1;
|
121 |
+
}
|
122 |
+
</style>
|
123 |
+
</head>
|
124 |
+
|
125 |
+
<body class="bg-gray-100 dark:bg-gray-900">
|
126 |
+
<div id="alert"
|
127 |
+
class="hidden fixed top-0 right-0 left-0 z-50 w-full h-[100vh] bg-black bg-opacity-50 justify-center items-center overflow-y-hidden">
|
128 |
+
<div class="card flex flex-col min-w-[400px] max-w-2xl max-h-[80vh] p-0 overflow-hidden">
|
129 |
+
<!-- Modal header -->
|
130 |
+
<div class="flex items-center justify-between p-4 md:p-5 border-b rounded-t dark:border-gray-600">
|
131 |
+
<h3 id="alert_title" class="text-xl font-semibold text-gray-900 dark:text-white">
|
132 |
+
Title
|
133 |
+
</h3>
|
134 |
+
<button id="close_alert" type="button"
|
135 |
+
class="text-gray-400 bg-transparent hover:bg-gray-200 hover:text-gray-900 rounded-lg text-sm w-8 h-8 ms-auto inline-flex justify-center items-center dark:hover:bg-gray-600 dark:hover:text-white">
|
136 |
+
<svg class="w-3 h-3" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none"
|
137 |
+
viewBox="0 0 14 14">
|
138 |
+
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="2"
|
139 |
+
d="m1 1 6 6m0 0 6 6M7 7l6-6M7 7l-6 6" />
|
140 |
+
</svg>
|
141 |
+
<span class="sr-only">Close modal</span>
|
142 |
+
</button>
|
143 |
+
</div>
|
144 |
+
<!-- Modal body -->
|
145 |
+
<div id="alert_text"
|
146 |
+
class="p-4 md:p-5 space-y-4 text-base leading-relaxed text-gray-700 dark:text-gray-200 overflow-y-auto">
|
147 |
+
Text
|
148 |
+
</div>
|
149 |
+
<!-- Modal footer -->
|
150 |
+
<div class="flex items-center p-4 md:p-5 border-t border-gray-200 rounded-b dark:border-gray-600">
|
151 |
+
<button id="finish_alert" type="button"
|
152 |
+
class="ml-auto text-white bg-blue-700 hover:bg-blue-800 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-5 py-2.5 text-center dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800">OK</button>
|
153 |
+
</div>
|
154 |
+
</div>
|
155 |
+
</div>
|
156 |
+
<div class="bg-white dark:bg-gray-800 w-full py-4 px-6 border border-b border-gray-200 dark:border-gray-600">
|
157 |
+
<div class="mx-auto max-w-[1490px] grid grid-cols-6 align-center items-center">
|
158 |
+
<!-- <img class="dark:hidden" src="img/cuhksz_logo.png" alt="cuhksz logo" class="h-[40px]">
|
159 |
+
<img class="hidden dark:block" src="img/cuhksz_logo_white.png" alt="cuhksz logo" class="h-[40px]"> -->
|
160 |
+
<span class="col-span-1"></span>
|
161 |
+
<span id="title" class="col-span-4 mx-auto font-[800] text-[20px] dark:text-white">SingVisio: Visual
|
162 |
+
Analytics of Diffusion Model for Singing Voice Conversion</span>
|
163 |
+
<!-- <span class="ml-auto mr-0 text-sm dark:text-white">Team: <i>Human Language Technology Lab,
|
164 |
+
CUHK-Shenzhen</i></span> -->
|
165 |
+
<div class="flex">
|
166 |
+
<button class="btn-small ml-auto" id="mode_change">Switch to _</button>
|
167 |
+
<button class="btn-small ml-2" id="help">Help?</button>
|
168 |
+
</div>
|
169 |
+
</div>
|
170 |
+
</div>
|
171 |
+
<div class="max-w-[1500px] m-auto">
|
172 |
+
<div class="flex flex-row items-start gap-0.5 py-3 p-1">
|
173 |
+
<div class="w-[300px] flex flex-col flex-none">
|
174 |
+
<div id="performance" class="card p-2 mb-2 flex flex-col flex-none relative">
|
175 |
+
<button class="absolute right-1 top-1 btn-small px-1.5 py-0.5 ml-auto rounded-full"
|
176 |
+
id="metrics_help">?</button>
|
177 |
+
<div class="flex flex-row">
|
178 |
+
<div id="histogram" class="flex-none"></div>
|
179 |
+
<div id="histogram2" class="flex-none"></div>
|
180 |
+
</div>
|
181 |
+
<span class="text-[12px] mx-auto dark:text-white">Metrics</span>
|
182 |
+
</div>
|
183 |
+
|
184 |
+
<div id="touch_map" class="card p-2 relative">
|
185 |
+
<button class="absolute right-1 top-1 btn-small px-1.5 py-0.5 ml-auto rounded-full"
|
186 |
+
id="projection_help">?</button>
|
187 |
+
<div class="flex mb-1 align-center items-center space-between dark:text-white">
|
188 |
+
<div class="ml-1 text-sm">Step: <span id="current_step_display_number"></span></div>
|
189 |
+
<div class="ml-auto flex mr-2">
|
190 |
+
<button class="btn-sec h-9 w-9 p-2.5 mb-0" id="reset_map">
|
191 |
+
<svg class="w-3.5 h-3.5" aria-hidden="true" xmlns="http://www.w3.org/2000/svg"
|
192 |
+
fill="none" viewBox="0 0 18 20">
|
193 |
+
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
|
194 |
+
stroke-width="2"
|
195 |
+
d="M16 1v5h-5M2 19v-5h5m10-4a8 8 0 0 1-14.947 3.97M1 10a8 8 0 0 1 14.947-3.97" />
|
196 |
+
</svg>
|
197 |
+
</button>
|
198 |
+
</div>
|
199 |
+
</div>
|
200 |
+
<div id="dataviz_axisZoom" class="flex flex-wrap border bg-white dark:bg-gray-800 relative"></div>
|
201 |
+
</div>
|
202 |
+
</div>
|
203 |
+
<div class="w-full">
|
204 |
+
<div id="step_preview" class="flex min-w-[500px] w-full bg-white dark:bg-gray-800 p-2 card mb-2">
|
205 |
+
<div class="mx-auto" id="preview_container">
|
206 |
+
</div>
|
207 |
+
<div class="mx-auto" id="preview_container2">
|
208 |
+
</div>
|
209 |
+
<div class="flex flex-col">
|
210 |
+
<button class="btn-sec" id="refreshpreview">
|
211 |
+
<svg class="w-4 h-4" aria-hidden="true" xmlns="http://www.w3.org/2000/svg" fill="none"
|
212 |
+
viewBox="0 0 18 20">
|
213 |
+
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
|
214 |
+
stroke-width="2"
|
215 |
+
d="M16 1v5h-5M2 19v-5h5m10-4a8 8 0 0 1-14.947 3.97M1 10a8 8 0 0 1 14.947-3.97" />
|
216 |
+
</svg>
|
217 |
+
</button>
|
218 |
+
</div>
|
219 |
+
</div>
|
220 |
+
<div id="mel_card_container" class="grid grid-cols-3 min-w-[915px] w-full gap-1 justify-items-center">
|
221 |
+
</div>
|
222 |
+
<div id="tips">
|
223 |
+
</div>
|
224 |
+
<div id="tooltip" role="tooltip"
|
225 |
+
class="invisible absolute z-10 inline-block px-3 py-2 text-sm font-medium text-white bg-gray-900 rounded-lg shadow-sm opacity-[0.9] dark:bg-gray-700">
|
226 |
+
Tooltip content
|
227 |
+
</div>
|
228 |
+
</div>
|
229 |
+
<div class="shrink-0 w-[180px]">
|
230 |
+
|
231 |
+
<div class="card py-2 px-3 relative">
|
232 |
+
<button class="absolute right-1 top-1 btn-small px-1.5 py-0.5 ml-auto rounded-full"
|
233 |
+
id="control_help">?</button>
|
234 |
+
<div class="flex items-center">
|
235 |
+
<h5 class="card-title my-1 text-lg">Control Panel</h5>
|
236 |
+
</div>
|
237 |
+
<div class="flex flex-col w-full rounded-lg gap-0.5" id="control_panel">
|
238 |
+
<div>
|
239 |
+
<label for="mode_id" class="select-label">Display Mode</label>
|
240 |
+
<select id="mode_id" class="select-select"></select>
|
241 |
+
</div>
|
242 |
+
<div>
|
243 |
+
<label for="sourcesinger_id" class="select-label">Source Singer</label>
|
244 |
+
<button id="sourcesinger_id" class="dropdown_button" type="button">
|
245 |
+
<span class="dropdown_button_text" id="sourcesinger_id_text">Choose Singer</span> <svg
|
246 |
+
class="w-2.5 h-2.5" aria-hidden="true" xmlns="http://www.w3.org/2000/svg"
|
247 |
+
fill="none" viewBox="0 0 10 6">
|
248 |
+
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
|
249 |
+
stroke-width="2" d="m1 1 4 4 4-4" />
|
250 |
+
</svg>
|
251 |
+
</button>
|
252 |
+
<!-- Dropdown menu -->
|
253 |
+
<div id="sourcesinger_id_dropdown"
|
254 |
+
class="absolute z-10 hidden bg-white divide-y divide-gray-100 rounded-lg shadow w-44 dark:bg-gray-700">
|
255 |
+
<ul class="py-2 text-sm text-gray-700 dark:text-gray-200">
|
256 |
+
</ul>
|
257 |
+
</div>
|
258 |
+
</div>
|
259 |
+
<div>
|
260 |
+
<label for="song_id" class="select-label">Song</label>
|
261 |
+
<button id="song_id" class="dropdown_button" type="button">
|
262 |
+
<span class="dropdown_button_text" id="song_id_text">Choose Song</span> <svg
|
263 |
+
class="w-2.5 h-2.5" aria-hidden="true" xmlns="http://www.w3.org/2000/svg"
|
264 |
+
fill="none" viewBox="0 0 10 6">
|
265 |
+
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
|
266 |
+
stroke-width="2" d="m1 1 4 4 4-4" />
|
267 |
+
</svg>
|
268 |
+
</button>
|
269 |
+
<!-- Dropdown menu -->
|
270 |
+
<div id="song_id_dropdown"
|
271 |
+
class="absolute z-10 hidden bg-white divide-y divide-gray-100 rounded-lg shadow w-44 dark:bg-gray-700">
|
272 |
+
<ul class="py-2 text-sm text-gray-700 dark:text-gray-200">
|
273 |
+
</ul>
|
274 |
+
</div>
|
275 |
+
</div>
|
276 |
+
<div>
|
277 |
+
<label for="target_id" class="select-label">Target Singer</label>
|
278 |
+
<button id="target_id" class="dropdown_button" type="button">
|
279 |
+
<span class="dropdown_button_text" id="target_id_text">Target Singer</span> <svg
|
280 |
+
class="w-2.5 h-2.5" aria-hidden="true" xmlns="http://www.w3.org/2000/svg"
|
281 |
+
fill="none" viewBox="0 0 10 6">
|
282 |
+
<path stroke="currentColor" stroke-linecap="round" stroke-linejoin="round"
|
283 |
+
stroke-width="2" d="m1 1 4 4 4-4" />
|
284 |
+
</svg>
|
285 |
+
</button>
|
286 |
+
<!-- Dropdown menu -->
|
287 |
+
<div id="target_id_dropdown"
|
288 |
+
class="absolute z-10 hidden bg-white divide-y divide-gray-100 rounded-lg shadow w-44 dark:bg-gray-700">
|
289 |
+
<ul class="py-2 text-sm text-gray-700 dark:text-gray-200">
|
290 |
+
</ul>
|
291 |
+
</div>
|
292 |
+
</div>
|
293 |
+
<div class="relative">
|
294 |
+
<label for="pic_id" class="select-label">Projection Embedding</label>
|
295 |
+
<select id="pic_id" class="select-select"></select>
|
296 |
+
</div>
|
297 |
+
<div class="relative" id="components">
|
298 |
+
<label for="components" class="select-label">Components</label>
|
299 |
+
<div class="flex flex-col gap-0.5">
|
300 |
+
<div class="flex items-center">
|
301 |
+
<input id="components_pitch" type="checkbox" checked class="checkbox">
|
302 |
+
<label for="components_pitch"
|
303 |
+
class="ml-1 text-[0.775rem] font-normal text-gray-900 dark:text-gray-300">F0
|
304 |
+
contour</label>
|
305 |
+
</div>
|
306 |
+
<div class="flex items-start">
|
307 |
+
<input id="components_frequncy" type="checkbox" checked class="checkbox">
|
308 |
+
<div class="flex flex-col gap-0.5 grow">
|
309 |
+
<label for="components_frequncy"
|
310 |
+
class="ml-1 mb-1 text-[0.775rem] font-normal text-gray-900 dark:text-gray-300">Frequency</label>
|
311 |
+
<div class="flex inputs w-full">
|
312 |
+
<input id="inputs_min" type="range"
|
313 |
+
class="h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700"
|
314 |
+
value="0" min="0">
|
315 |
+
<input id="inputs_max" type="range"
|
316 |
+
class="h-2 bg-gray-200 rounded-lg appearance-none cursor-pointer dark:bg-gray-700"
|
317 |
+
value="100" max="100">
|
318 |
+
</div>
|
319 |
+
<div class="flex w-full">
|
320 |
+
<span id="inputs_left" class="ml-1 mr-auto text-[0.7rem] font-normal text-gray-900 dark:text-white">0</span>
|
321 |
+
<span id="inputs_right" class="mr-2 ml-auto text-[0.7rem] font-normal text-gray-900 dark:text-white">100</span>
|
322 |
+
</div>
|
323 |
+
</div>
|
324 |
+
</div>
|
325 |
+
|
326 |
+
<div class="flex items-start">
|
327 |
+
<input id="sampling_steps" type="checkbox" class="checkbox">
|
328 |
+
<div class="flex flex-col grow">
|
329 |
+
<label for="sampling_steps"
|
330 |
+
class="ml-1 text-[0.775rem] font-normal text-gray-900 dark:text-gray-300">Sampling
|
331 |
+
steps</label>
|
332 |
+
<div class="flex flex-row h-[32px]">
|
333 |
+
<span class="my-auto mx-1 text-[0.775rem] font-normal text-gray-900 dark:text-white">Step count:</span>
|
334 |
+
<input type="text"
|
335 |
+
class="small-input flex-none w-[50px] text-center bg-white dark:bg-gray-800"
|
336 |
+
id="sampling_num" value="100">
|
337 |
+
</div>
|
338 |
+
|
339 |
+
</div>
|
340 |
+
</div>
|
341 |
+
</div>
|
342 |
+
</div>
|
343 |
+
|
344 |
+
|
345 |
+
<div id="step_axis">
|
346 |
+
<label for="range" class="select-label">Step Axis</label>
|
347 |
+
|
348 |
+
<div
|
349 |
+
class="items-center w-full rounded-lg bg-gray-50 flex flex-row gap-2 px-2 py-0.5 border border-gray-300 dark:border-gray-600 dark:text-white dark:bg-gray-700">
|
350 |
+
|
351 |
+
<input class="step-axis my-2 w-full" id="range" type="range" min="0" max="999" value="0"
|
352 |
+
step="1">
|
353 |
+
<button class="btn-small" id="controls">
|
354 |
+
<svg id="icon_play" style="display: none" class="w-3 h-3" aria-hidden="true"
|
355 |
+
xmlns="http://www.w3.org/2000/svg" fill="currentColor" viewBox="0 0 14 16">
|
356 |
+
<path
|
357 |
+
d="M0 .984v14.032a1 1 0 0 0 1.506.845l12.006-7.016a.974.974 0 0 0 0-1.69L1.506.139A1 1 0 0 0 0 .984Z" />
|
358 |
+
</svg>
|
359 |
+
<svg id="icon_stop" class="w-3 h-3" aria-hidden="true"
|
360 |
+
xmlns="http://www.w3.org/2000/svg" fill="currentColor" viewBox="0 0 12 16">
|
361 |
+
<path
|
362 |
+
d="M3 0H2a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h1a2 2 0 0 0 2-2V2a2 2 0 0 0-2-2Zm7 0H9a2 2 0 0 0-2 2v12a2 2 0 0 0 2 2h1a2 2 0 0 0 2-2V2a2 2 0 0 0-2-2Z" />
|
363 |
+
</svg>
|
364 |
+
</button>
|
365 |
+
|
366 |
+
</div>
|
367 |
+
<div class="flex gap-1 mt-2">
|
368 |
+
<span class="my-auto mr-1 text-sm font-medium text-gray-900 dark:text-white">Step:</span>
|
369 |
+
<input type="text"
|
370 |
+
class="small-input flex-none w-[60px] text-center bg-white dark:bg-gray-800" id="value">
|
371 |
+
<button class="btn-small" id="add_preview">
|
372 |
+
Pin
|
373 |
+
</button>
|
374 |
+
</div>
|
375 |
+
</div>
|
376 |
+
|
377 |
+
</div>
|
378 |
+
</div>
|
379 |
+
</div>
|
380 |
+
</div>
|
381 |
+
|
382 |
+
</div>
|
383 |
+
|
384 |
+
<script src="./resources/init.js"></script>
|
385 |
+
<script src="./resources/function.js"></script>
|
386 |
+
<script src="./resources/event.js"></script>
|
387 |
+
<script>
|
388 |
+
initConfig('./config/default.json')
|
389 |
+
</script>
|
390 |
+
</body>
|