Spaces:
Build error
Build error
better handle numbers 2
Browse files- app.py +2 -1
- pretrained_models/giga330M.pth +0 -3
- pretrained_models/giga830M.pth +0 -3
app.py
CHANGED
@@ -271,6 +271,7 @@ def run(seed, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p,
|
|
271 |
inference_transcript += target_transcript + "\n"
|
272 |
|
273 |
prompt_end_frame = int(min(audio_dur, prompt_end_time) * info.sample_rate)
|
|
|
274 |
_, gen_audio = inference_one_sample(voicecraft_model["model"],
|
275 |
voicecraft_model["config"],
|
276 |
voicecraft_model["phn2num"],
|
@@ -299,7 +300,7 @@ def run(seed, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p,
|
|
299 |
morphed_span = (max(edit_start_time - left_margin, 1 / codec_sr), min(edit_end_time + right_margin, audio_dur))
|
300 |
mask_interval = [[round(morphed_span[0]*codec_sr), round(morphed_span[1]*codec_sr)]]
|
301 |
mask_interval = torch.LongTensor(mask_interval)
|
302 |
-
|
303 |
_, gen_audio = inference_one_sample(voicecraft_model["model"],
|
304 |
voicecraft_model["config"],
|
305 |
voicecraft_model["phn2num"],
|
|
|
271 |
inference_transcript += target_transcript + "\n"
|
272 |
|
273 |
prompt_end_frame = int(min(audio_dur, prompt_end_time) * info.sample_rate)
|
274 |
+
target_transcript = replace_numbers_with_words(target_transcript).replace(" ", " ").replace(" ", " ") # replace numbers with words, so that the phonemizer can do a better job
|
275 |
_, gen_audio = inference_one_sample(voicecraft_model["model"],
|
276 |
voicecraft_model["config"],
|
277 |
voicecraft_model["phn2num"],
|
|
|
300 |
morphed_span = (max(edit_start_time - left_margin, 1 / codec_sr), min(edit_end_time + right_margin, audio_dur))
|
301 |
mask_interval = [[round(morphed_span[0]*codec_sr), round(morphed_span[1]*codec_sr)]]
|
302 |
mask_interval = torch.LongTensor(mask_interval)
|
303 |
+
target_transcript = replace_numbers_with_words(target_transcript).replace(" ", " ").replace(" ", " ") # replace numbers with words, so that the phonemizer can do a better job
|
304 |
_, gen_audio = inference_one_sample(voicecraft_model["model"],
|
305 |
voicecraft_model["config"],
|
306 |
voicecraft_model["phn2num"],
|
pretrained_models/giga330M.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:35e028b8c5237cb4a6050ca81d4569b98e3a34ad9175fa252f7b1d13e6a9ad26
|
3 |
-
size 1746844161
|
|
|
|
|
|
|
|
pretrained_models/giga830M.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:2454b51575822a04d24a00f8ba78f201f916439ffa62a3c1ac0ffa5220f429e3
|
3 |
-
size 3358342977
|
|
|
|
|
|
|
|