Spaces:

DrAliGomaa
/

sidiali_project

Paused

App Files Files Community

Muhammed_Kotb1 commited on Aug 17

Commit

5dd8287

•

1 Parent(s): 214097c

test ziad model

Browse files

Files changed (1) hide show

app.py +53 -5

app.py CHANGED Viewed

@@ -1,7 +1,55 @@
-import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import torch
+import torchaudio
+from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer
+# Load the Arabic-specific processor and model
+model_name = "Zaid/wav2vec2-large-xlsr-53-arabic-egyptian"
+tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(model_name)
+processor = Wav2Vec2Processor.from_pretrained(model_name, tokenizer=tokenizer)
+model = Wav2Vec2ForCTC.from_pretrained(model_name)
+def transcribe(audio_file):
+    try:
+        # Load the audio file
+        print("Loading audio file...")
+        audio_input, sr = torchaudio.load(audio_file)
+        print(f"Audio loaded: {audio_input.shape}, Sample rate: {sr}")
+        # Resample if needed
+        if sr != 16000:
+            print("Resampling audio...")
+            resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
+            audio_input = resampler(audio_input)
+            sr = 16000
+        print(f"Audio shape after resampling: {audio_input.shape}, Sample rate: {sr}")
+        # Convert tensor to numpy array
+        audio_input = audio_input[0].numpy()
+        # Process audio input
+        print("Processing audio input...")
+        input_values = processor(audio_input, return_tensors="pt", sampling_rate=sr).input_values
+        # Run model inference
+        print("Running model inference...")
+        with torch.no_grad():
+            logits = model(input_values).logits
+        # Decode transcription
+        print("Decoding transcription...")
+        predicted_ids = torch.argmax(logits, dim=-1)
+        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
+        return transcription[0]
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
+# Transcribe the audio file
+transcription = transcribe("sidiali.wav")
+if transcription:
+    print(transcription.encode('utf-8').decode('utf-8'))
+    # Save the transcription to a file
+    with open("transcription.txt", "w", encoding="utf-8") as f:
+        f.write(transcription)
+    print("Transcription saved to transcription.txt")