Muhammed_Kotb1 commited on
Commit
5dd8287
1 Parent(s): 214097c

test ziad model

Browse files
Files changed (1) hide show
  1. app.py +53 -5
app.py CHANGED
@@ -1,7 +1,55 @@
1
- import gradio as gr
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer
4
 
5
+ # Load the Arabic-specific processor and model
6
+ model_name = "Zaid/wav2vec2-large-xlsr-53-arabic-egyptian"
7
+ tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(model_name)
8
+ processor = Wav2Vec2Processor.from_pretrained(model_name, tokenizer=tokenizer)
9
+ model = Wav2Vec2ForCTC.from_pretrained(model_name)
10
 
11
+ def transcribe(audio_file):
12
+ try:
13
+ # Load the audio file
14
+ print("Loading audio file...")
15
+ audio_input, sr = torchaudio.load(audio_file)
16
+ print(f"Audio loaded: {audio_input.shape}, Sample rate: {sr}")
17
+
18
+ # Resample if needed
19
+ if sr != 16000:
20
+ print("Resampling audio...")
21
+ resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
22
+ audio_input = resampler(audio_input)
23
+ sr = 16000
24
+
25
+ print(f"Audio shape after resampling: {audio_input.shape}, Sample rate: {sr}")
26
+
27
+ # Convert tensor to numpy array
28
+ audio_input = audio_input[0].numpy()
29
+
30
+ # Process audio input
31
+ print("Processing audio input...")
32
+ input_values = processor(audio_input, return_tensors="pt", sampling_rate=sr).input_values
33
+
34
+ # Run model inference
35
+ print("Running model inference...")
36
+ with torch.no_grad():
37
+ logits = model(input_values).logits
38
+
39
+ # Decode transcription
40
+ print("Decoding transcription...")
41
+ predicted_ids = torch.argmax(logits, dim=-1)
42
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
43
+ return transcription[0]
44
+ except Exception as e:
45
+ print(f"An error occurred: {e}")
46
+ return None
47
+
48
+ # Transcribe the audio file
49
+ transcription = transcribe("sidiali.wav")
50
+ if transcription:
51
+ print(transcription.encode('utf-8').decode('utf-8'))
52
+ # Save the transcription to a file
53
+ with open("transcription.txt", "w", encoding="utf-8") as f:
54
+ f.write(transcription)
55
+ print("Transcription saved to transcription.txt")