yaya-sy commited on
Commit
da142ce
1 Parent(s): 1b64461

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -16
app.py CHANGED
@@ -1,13 +1,25 @@
1
  import gradio as gr
2
- from transformers import Wav2Vec2ForCTC, AutoProcessor, pipeline
3
- from optimum.bettertransformer import BetterTransformer
4
  import torch
5
  import librosa
6
  import json
7
 
8
- model_id = "cawoylel/windanam_mms-1b-tts_v2"
9
- processor = AutoProcessor.from_pretrained(model_id)
10
- model = Wav2Vec2ForCTC.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def transcribe(audio_file_mic=None, audio_file_upload=None):
13
  if audio_file_mic:
@@ -22,17 +34,7 @@ def transcribe(audio_file_mic=None, audio_file_upload=None):
22
  if sample_rate != 16000:
23
  speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
24
 
25
- # Keep the same model in memory and simply switch out the language adapters by calling load_adapter() for the model and set_target_lang() for the tokenizer
26
- processor.tokenizer.set_target_lang("ful")
27
-
28
- inputs = processor(speech, sampling_rate=16_000, return_tensors="pt")
29
-
30
- with torch.no_grad():
31
- outputs = model(**inputs).logits
32
-
33
- ids = torch.argmax(outputs, dim=-1)[0]
34
- transcription = processor.decode(ids)
35
- return transcription
36
 
37
 
38
  description = '''Automatic Speech Recognition with [MMS](https://ai.facebook.com/blog/multilingual-model-speech-recognition/) (Massively Multilingual Speech) by Meta.
 
1
  import gradio as gr
2
+ from pipeline
 
3
  import torch
4
  import librosa
5
  import json
6
 
7
+ def load_model(model_name = "cawoylel/windanam_mms-1b-tts_v2"):
8
+ """
9
+ Function to load model from hugging face
10
+ """
11
+ pipe = pipeline("automatic-speech-recognition", model=model_name)
12
+ return pipe
13
+
14
+ pipeline = load_model()
15
+
16
+ st.cache_data(show_spinner=st.session_state.mapping[st.session_state.language]["transcribe"])
17
+ def transcribe_audio(sample):
18
+ """
19
+ Transcribe audio
20
+ """
21
+ transcription = pipeline(sample)
22
+ return transcription["text"]
23
 
24
  def transcribe(audio_file_mic=None, audio_file_upload=None):
25
  if audio_file_mic:
 
34
  if sample_rate != 16000:
35
  speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
36
 
37
+ return transcribe_audio(speech)
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  description = '''Automatic Speech Recognition with [MMS](https://ai.facebook.com/blog/multilingual-model-speech-recognition/) (Massively Multilingual Speech) by Meta.