Spaces:

facebook
/

XLS-R-2B-22-16

Build error

patrickvonplaten commited on Nov 18, 2021

Commit

ec5489a

•

1 Parent(s): 2c93146

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,19 +1,21 @@
 import gradio as gr
 import librosa
 from transformers import AutoFeatureExtractor, AutoTokenizer, SpeechEncoderDecoderModel
 model_name = "facebook/wav2vec2-xls-r-2b-22-to-16"
-feature_extractor = AutoFeatureExtractor.from_pretrained(model_name, use_auth_token="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")
-tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token="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", use_fast=False)
-model = SpeechEncoderDecoderModel.from_pretrained(model_name, use_auth_token="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")
 def process_audio_file(file):
     data, sr = librosa.load(file)
     if sr != 16000:
         data = librosa.resample(data, sr, 16000)
     print(data.shape)
-    input_values = feature_extractor(data, return_tensors="pt").input_values
     return input_values
 def transcribe(file, target_language):
@@ -75,7 +77,9 @@ iface = gr.Interface(
     outputs="text",
     layout="horizontal",
     theme="huggingface",
-    title="XLS-R 300M 22-to-16 Speech Translation",
     description="A simple interface to translate from 22 input spoken languages to 16 written languages.",
 )
 iface.launch()

 import gradio as gr
 import librosa
 from transformers import AutoFeatureExtractor, AutoTokenizer, SpeechEncoderDecoderModel
+import torch
 model_name = "facebook/wav2vec2-xls-r-2b-22-to-16"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+model = SpeechEncoderDecoderModel.from_pretrained(model_name).to(device)
 def process_audio_file(file):
     data, sr = librosa.load(file)
     if sr != 16000:
         data = librosa.resample(data, sr, 16000)
     print(data.shape)
+    input_values = feature_extractor(data, return_tensors="pt").input_values.to(device)
     return input_values
 def transcribe(file, target_language):
     outputs="text",
     layout="horizontal",
     theme="huggingface",
+    title="XLS-R 2B 22-to-16 Speech Translation",
     description="A simple interface to translate from 22 input spoken languages to 16 written languages.",
+    article = "<p style='text-align: center'><a href='https://huggingface.co/facebook/wav2vec2-xls-r-2b-22-to-16' target='_blank'>Click to learn more about XLS-R-2B-22-16 </a> | <a href='https://arxiv.org/abs/2111.09296' target='_blank'> With 🎙️ from Facebook XLS-R </a></p>",
 )
 iface.launch()