thuyentruong
commited on
Commit
•
8fe55e6
1
Parent(s):
7f7e6cf
Update app.py
Browse files
app.py
CHANGED
@@ -25,7 +25,7 @@ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze
|
|
25 |
def translate(audio):
|
26 |
# Trick Whisper to translate from any language to Dutch.
|
27 |
# Note that using task=translate will translate to English instead.
|
28 |
-
outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "
|
29 |
return outputs["text"]
|
30 |
|
31 |
|
@@ -34,9 +34,10 @@ def synthesise(text):
|
|
34 |
# Otherwise model.generate_speech will throw errors.
|
35 |
inputs = processor(
|
36 |
text=text,
|
|
|
37 |
max_length=598,
|
38 |
truncation=True,
|
39 |
-
padding=True,
|
40 |
return_tensors="pt"
|
41 |
)
|
42 |
# inputs = processor(text=text, return_tensors="pt")
|
@@ -57,7 +58,6 @@ Demo for cascaded speech-to-speech translation (STST), mapping from source speec
|
|
57 |
[SpeechT5 TTS](https://huggingface.co/sanchit-gandhi/speecht5_tts_vox_nl) model for text-to-speech:
|
58 |
![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
|
59 |
"""
|
60 |
-
|
61 |
demo = gr.Blocks()
|
62 |
|
63 |
mic_translate = gr.Interface(
|
|
|
25 |
def translate(audio):
|
26 |
# Trick Whisper to translate from any language to Dutch.
|
27 |
# Note that using task=translate will translate to English instead.
|
28 |
+
outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language": "dutch"})
|
29 |
return outputs["text"]
|
30 |
|
31 |
|
|
|
34 |
# Otherwise model.generate_speech will throw errors.
|
35 |
inputs = processor(
|
36 |
text=text,
|
37 |
+
# max_length=200,
|
38 |
max_length=598,
|
39 |
truncation=True,
|
40 |
+
# padding=True,
|
41 |
return_tensors="pt"
|
42 |
)
|
43 |
# inputs = processor(text=text, return_tensors="pt")
|
|
|
58 |
[SpeechT5 TTS](https://huggingface.co/sanchit-gandhi/speecht5_tts_vox_nl) model for text-to-speech:
|
59 |
![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
|
60 |
"""
|
|
|
61 |
demo = gr.Blocks()
|
62 |
|
63 |
mic_translate = gr.Interface(
|