pyp1 commited on
Commit
4a359f0
1 Parent(s): eb0f340

better handle numbers 2

Browse files
app.py CHANGED
@@ -271,6 +271,7 @@ def run(seed, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p,
271
  inference_transcript += target_transcript + "\n"
272
 
273
  prompt_end_frame = int(min(audio_dur, prompt_end_time) * info.sample_rate)
 
274
  _, gen_audio = inference_one_sample(voicecraft_model["model"],
275
  voicecraft_model["config"],
276
  voicecraft_model["phn2num"],
@@ -299,7 +300,7 @@ def run(seed, left_margin, right_margin, codec_audio_sr, codec_sr, top_k, top_p,
299
  morphed_span = (max(edit_start_time - left_margin, 1 / codec_sr), min(edit_end_time + right_margin, audio_dur))
300
  mask_interval = [[round(morphed_span[0]*codec_sr), round(morphed_span[1]*codec_sr)]]
301
  mask_interval = torch.LongTensor(mask_interval)
302
-
303
  _, gen_audio = inference_one_sample(voicecraft_model["model"],
304
  voicecraft_model["config"],
305
  voicecraft_model["phn2num"],
 
271
  inference_transcript += target_transcript + "\n"
272
 
273
  prompt_end_frame = int(min(audio_dur, prompt_end_time) * info.sample_rate)
274
+ target_transcript = replace_numbers_with_words(target_transcript).replace(" ", " ").replace(" ", " ") # replace numbers with words, so that the phonemizer can do a better job
275
  _, gen_audio = inference_one_sample(voicecraft_model["model"],
276
  voicecraft_model["config"],
277
  voicecraft_model["phn2num"],
 
300
  morphed_span = (max(edit_start_time - left_margin, 1 / codec_sr), min(edit_end_time + right_margin, audio_dur))
301
  mask_interval = [[round(morphed_span[0]*codec_sr), round(morphed_span[1]*codec_sr)]]
302
  mask_interval = torch.LongTensor(mask_interval)
303
+ target_transcript = replace_numbers_with_words(target_transcript).replace(" ", " ").replace(" ", " ") # replace numbers with words, so that the phonemizer can do a better job
304
  _, gen_audio = inference_one_sample(voicecraft_model["model"],
305
  voicecraft_model["config"],
306
  voicecraft_model["phn2num"],
pretrained_models/giga330M.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:35e028b8c5237cb4a6050ca81d4569b98e3a34ad9175fa252f7b1d13e6a9ad26
3
- size 1746844161
 
 
 
 
pretrained_models/giga830M.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2454b51575822a04d24a00f8ba78f201f916439ffa62a3c1ac0ffa5220f429e3
3
- size 3358342977