Spaces:
Paused
Paused
File size: 2,106 Bytes
5dd8287 214097c 5dd8287 214097c 5dd8287 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import torch
import torchaudio
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer
# Load the Arabic-specific processor and model
model_name = "Zaid/wav2vec2-large-xlsr-53-arabic-egyptian"
tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(model_name)
processor = Wav2Vec2Processor.from_pretrained(model_name, tokenizer=tokenizer)
model = Wav2Vec2ForCTC.from_pretrained(model_name)
def transcribe(audio_file):
try:
# Load the audio file
print("Loading audio file...")
audio_input, sr = torchaudio.load(audio_file)
print(f"Audio loaded: {audio_input.shape}, Sample rate: {sr}")
# Resample if needed
if sr != 16000:
print("Resampling audio...")
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=16000)
audio_input = resampler(audio_input)
sr = 16000
print(f"Audio shape after resampling: {audio_input.shape}, Sample rate: {sr}")
# Convert tensor to numpy array
audio_input = audio_input[0].numpy()
# Process audio input
print("Processing audio input...")
input_values = processor(audio_input, return_tensors="pt", sampling_rate=sr).input_values
# Run model inference
print("Running model inference...")
with torch.no_grad():
logits = model(input_values).logits
# Decode transcription
print("Decoding transcription...")
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
return transcription[0]
except Exception as e:
print(f"An error occurred: {e}")
return None
# Transcribe the audio file
transcription = transcribe("sidiali.wav")
if transcription:
print(transcription.encode('utf-8').decode('utf-8'))
# Save the transcription to a file
with open("transcription.txt", "w", encoding="utf-8") as f:
f.write(transcription)
print("Transcription saved to transcription.txt")
|