muzammil-eds commited on
Commit
8835c0c
1 Parent(s): 9f89cc5

Upload 2 files

Browse files
Files changed (2) hide show
  1. app_v4.py +85 -0
  2. requirements.txt +6 -0
app_v4.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import Levenshtein
4
+ import librosa
5
+ import torch
6
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
7
+
8
+ def load_model():
9
+ MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
10
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
11
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
12
+ return processor, model
13
+
14
+ processor, model = load_model()
15
+
16
+ def transcribe_audio_hf(audio_path):
17
+ """
18
+ Transcribes speech from an audio file using a pretrained Wav2Vec2 model.
19
+ Args:
20
+ audio_path (str): Path to the audio file.
21
+ Returns:
22
+ str: The transcription of the speech in the audio file.
23
+ """
24
+ speech_array, sampling_rate = librosa.load(audio_path, sr=16000)
25
+ input_values = processor(speech_array, sampling_rate=sampling_rate, return_tensors="pt", padding=True).input_values
26
+ with torch.no_grad():
27
+ logits = model(input_values).logits
28
+ predicted_ids = torch.argmax(logits, dim=-1)
29
+ transcription = processor.batch_decode(predicted_ids)[0].strip()
30
+ return transcription
31
+
32
+ def levenshtein_similarity(transcription1, transcription2):
33
+ """
34
+ Calculate the Levenshtein similarity between two transcriptions.
35
+ Args:
36
+ transcription1 (str): The first transcription.
37
+ transcription2 (str): The second transcription.
38
+ Returns:
39
+ float: A normalized similarity score between 0 and 1, where 1 indicates identical transcriptions.
40
+ """
41
+ distance = Levenshtein.distance(transcription1, transcription2)
42
+ max_len = max(len(transcription1), len(transcription2))
43
+ return 1 - distance / max_len # Normalize to get similarity score
44
+
45
+ def evaluate_audio_similarity(original_audio, user_audio):
46
+ """
47
+ Compares the similarity between the transcription of an original audio file and a user's audio file.
48
+ Args:
49
+ original_audio (str): Path to the original audio file.
50
+ user_audio (str): Path to the user's audio file.
51
+ Returns:
52
+ tuple: Transcriptions and Levenshtein similarity score.
53
+ """
54
+ transcription_original = transcribe_audio_hf(original_audio)
55
+ transcription_user = transcribe_audio_hf(user_audio)
56
+ similarity_score_levenshtein = levenshtein_similarity(transcription_original, transcription_user)
57
+ return transcription_original, transcription_user, similarity_score_levenshtein
58
+
59
+ def perform_testing(original_audio, user_audio):
60
+ if original_audio is not None and user_audio is not None:
61
+ transcription_original, transcription_user, similarity_score = evaluate_audio_similarity(original_audio, user_audio)
62
+ return (
63
+ f"**Original Transcription:** {transcription_original}",
64
+ f"**User Transcription:** {transcription_user}",
65
+ f"**Levenshtein Similarity Score:** {similarity_score:.2f}"
66
+ )
67
+
68
+ # Gradio Interface
69
+ with gr.Blocks() as app:
70
+ gr.Markdown("# Audio Transcription and Similarity Checker")
71
+
72
+ original_audio_upload = gr.Audio(label="Upload Original Audio", type="filepath")
73
+ user_audio_upload = gr.Audio(label="Upload User Audio", type="filepath")
74
+ upload_button = gr.Button("Perform Testing")
75
+ output_original_transcription = gr.Markdown()
76
+ output_user_transcription = gr.Markdown()
77
+ output_similarity_score = gr.Markdown()
78
+
79
+ upload_button.click(
80
+ perform_testing,
81
+ inputs=[original_audio_upload, user_audio_upload],
82
+ outputs=[output_original_transcription, output_user_transcription, output_similarity_score]
83
+ )
84
+
85
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers[torch]
2
+ pydub
3
+ Levenshtein
4
+ av
5
+ librosa
6
+ gradio