speech-to-speech-translation

Sleeping

Joserzapata commited on Jul 28, 2023

Commit

301c110

•

1 Parent(s): e8edcc6

add de translation

update translate and synthesise

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,13 +20,19 @@ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(devic
 embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
 speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 def translate(audio):
-    outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
     return outputs["text"]
 def synthesise(text):
     inputs = processor(text=text, return_tensors="pt")
     speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
     return speech.cpu()

 embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
 speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+model_mms = VitsModel.from_pretrained("Matthijs/mms-tts-deu").to(device)
+tokenizer_mms = VitsTokenizer.from_pretrained("Matthijs/mms-tts-deu")
 def translate(audio):
+    outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language": "de"})
     return outputs["text"]
 def synthesise(text):
+    inputs = tokenizer_mms(text, return_tensors="pt")
+    input_ids = inputs["input_ids"]
+    with torch.no_grad():
+        outputs = model_mms(input_ids)
     inputs = processor(text=text, return_tensors="pt")
     speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
     return speech.cpu()