Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -27,21 +27,22 @@ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validat
27
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
28
 
29
 
 
 
 
 
 
 
 
30
 
31
 
32
  def translate(audio):
33
  outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
34
  english = outputs["text"]
35
 
36
- src = "en" # source language
37
- trg = "fr" # target language
38
- model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
39
-
40
- model = TFMarianMTModel.from_pretrained(model_name)
41
- tokenizer = AutoTokenizer.from_pretrained(model_name)
42
 
43
  batch = tokenizer([english], return_tensors="tf")
44
- gen = model.generate(**batch)
45
  return tokenizer.batch_decode(gen, skip_special_tokens=True)[0]
46
 
47
 
 
27
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
28
 
29
 
30
+ src = "en" # source language
31
+ trg = "fr" # target language
32
+ model_name = f"Helsinki-NLP/opus-mt-{src}-{trg}"
33
+
34
+ model_tranlator = TFMarianMTModel.from_pretrained(model_name)
35
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
36
+
37
 
38
 
39
  def translate(audio):
40
  outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
41
  english = outputs["text"]
42
 
 
 
 
 
 
 
43
 
44
  batch = tokenizer([english], return_tensors="tf")
45
+ gen = model_tranlator.generate(**batch)
46
  return tokenizer.batch_decode(gen, skip_special_tokens=True)[0]
47
 
48