Spaces:

anyantudre
/

moore-language-translation-tts-stt

Runtime error

anyantudre commited on Aug 2

Commit

5c30e04

•

1 Parent(s): 1ffb58d

Rename speech_to_text.py to goai_stt.py

Files changed (2) hide show

goai_stt.py ADDED Viewed

+import librosa
+import torch
+import time
+from transformers import set_seed, Wav2Vec2ForCTC, AutoProcessor
+def goai_stt(fichier, device):
+    """
+    Transcrire un fichier audio donné.
+    Paramètres
+    ----------
+    fichier: str
+        Le chemin d'accès au fichier audio.
+    device: str
+        GPU ou CPU
+    Return
+    ----------
+    transcript: str
+        Le texte transcrit.
+    """
+    ### assurer reproducibilité
+    set_seed(2024)
+    start_time = time.time()
+    ### charger le modèle de transcription
+    model_id = "anyantudre/wav2vec2-large-mms-1b-mos-V1"
+    processor = AutoProcessor.from_pretrained(model_id)
+    model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang="mos", device=device, ignore_mismatched_sizes=True)
+    ### preprocessing de l'audio
+    signal, sampling_rate =  librosa.load(fichier, sr=16000)
+    inputs = processor(signal, sampling_rate=16_000, return_tensors="pt", padding=True)
+    ### faire l'inference
+    with torch.no_grad():
+        outputs = model(**inputs).logits
+    pred_ids = torch.argmax(outputs, dim=-1)[0]
+    transcription = processor.decode(pred_ids)
+    print("Temps écoulé: ", int(time.time() - start_time), " secondes")
+    return transcription

speech_to_text.py DELETED Viewed

@@ -1,46 +0,0 @@
-import librosa
-import torch
-from transformers import Wav2Vec2ForCTC, AutoProcessor
-from transformers import set_seed
-import time
-def transcribe(fp:str, target_lang:str) -> str:
-    '''
-    For given audio file, transcribe it.
-    Parameters
-    ----------
-    fp: str
-        The file path to the audio file.
-    target_lang:str
-        The ISO-3 code of the target language.
-    Returns
-    ----------
-    transcript:str
-        The transcribed text.
-    '''
-    # Ensure replicability
-    set_seed(555)
-    start_time = time.time()
-    # Load transcription model
-    model_id = "facebook/mms-1b-all"
-    processor = AutoProcessor.from_pretrained(model_id, target_lang=target_lang)
-    model = Wav2Vec2ForCTC.from_pretrained(model_id, target_lang=target_lang, ignore_mismatched_sizes=True)
-    # Process the audio
-    signal, sampling_rate =  librosa.load(fp, sr=16000)
-    inputs = processor(signal, sampling_rate=16_000, return_tensors="pt")
-    # Inference
-    with torch.no_grad():
-        outputs = model(**inputs).logits
-    ids = torch.argmax(outputs, dim=-1)[0]
-    transcript = processor.decode(ids)
-    print("Time elapsed: ", int(time.time() - start_time), " seconds")
-    return transcript