Hammad712 commited on
Commit
913fbb4
1 Parent(s): 548b9a1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +149 -0
app.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ import librosa
4
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
5
+ import Levenshtein
6
+ from io import BytesIO
7
+ from audio_recorder_streamlit import audio_recorder
8
+
9
+ # Load the processor and model for Wav2Vec2 once
10
+ @st.cache_resource
11
+ def load_model():
12
+ MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
13
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
14
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
15
+ return processor, model
16
+
17
+ processor, model = load_model()
18
+
19
+ def transcribe_audio(audio_bytes):
20
+ """
21
+ Transcribes speech from an audio file using a pretrained Wav2Vec2 model.
22
+
23
+ Args:
24
+ audio_bytes (bytes): Audio data in bytes.
25
+
26
+ Returns:
27
+ str: The transcription of the speech in the audio file.
28
+ """
29
+ speech_array, sampling_rate = librosa.load(BytesIO(audio_bytes), sr=16000)
30
+ input_values = processor(speech_array, sampling_rate=sampling_rate, return_tensors="pt", padding=True).input_values
31
+ with torch.no_grad():
32
+ logits = model(input_values).logits
33
+ predicted_ids = torch.argmax(logits, dim=-1)
34
+ transcription = processor.batch_decode(predicted_ids)[0].strip()
35
+ return transcription
36
+
37
+ def levenshtein_similarity(transcription1, transcription2):
38
+ """
39
+ Calculate the Levenshtein similarity between two transcriptions.
40
+
41
+ Args:
42
+ transcription1 (str): The first transcription.
43
+ transcription2 (str): The second transcription.
44
+
45
+ Returns:
46
+ float: A normalized similarity score between 0 and 1, where 1 indicates identical transcriptions.
47
+ """
48
+ distance = Levenshtein.distance(transcription1, transcription2)
49
+ max_len = max(len(transcription1), len(transcription2))
50
+ return 1 - distance / max_len # Normalize to get similarity score
51
+
52
+ def evaluate_audio_similarity(original_audio_bytes, user_audio_bytes):
53
+ """
54
+ Compares the similarity between the transcription of an original audio file and a user's audio file.
55
+
56
+ Args:
57
+ original_audio_bytes (bytes): Bytes of the original audio file.
58
+ user_audio_bytes (bytes): Bytes of the user's audio file.
59
+
60
+ Returns:
61
+ tuple: Transcriptions and Levenshtein similarity score.
62
+ """
63
+ transcription_original = transcribe_audio(original_audio_bytes)
64
+ transcription_user = transcribe_audio(user_audio_bytes)
65
+ similarity_score_levenshtein = levenshtein_similarity(transcription_original, transcription_user)
66
+ return transcription_original, transcription_user, similarity_score_levenshtein
67
+
68
+ st.title("Audio Transcription and Similarity Checker")
69
+
70
+ # Choose between upload or record
71
+ st.sidebar.header("Input Method")
72
+ input_method = st.sidebar.selectbox("Choose Input Method", ["Upload", "Record"])
73
+
74
+ original_audio_bytes = None
75
+ user_audio_bytes = None
76
+
77
+ if input_method == "Upload":
78
+ # Upload original audio file
79
+ original_audio = st.file_uploader("Upload Original Audio", type=["wav", "mp3"])
80
+ # Upload user audio file
81
+ user_audio = st.file_uploader("Upload User Audio", type=["wav", "mp3"])
82
+
83
+ if original_audio:
84
+ original_audio_bytes = original_audio.read()
85
+ st.audio(original_audio_bytes, format="audio/wav")
86
+ if user_audio:
87
+ user_audio_bytes = user_audio.read()
88
+ st.audio(user_audio_bytes, format="audio/wav")
89
+
90
+ # Add a button to perform the test
91
+ if original_audio_bytes and user_audio_bytes:
92
+ if st.button("Perform Testing"):
93
+ with st.spinner("Performing transcription and similarity testing..."):
94
+ transcription_original, transcription_user, similarity_score = evaluate_audio_similarity(original_audio_bytes, user_audio_bytes)
95
+
96
+ # Display results
97
+ st.markdown("---")
98
+ st.subheader("Transcriptions and Similarity Score")
99
+ st.write(f"**Original Transcription:** {transcription_original}")
100
+ st.write(f"**User Transcription:** {transcription_user}")
101
+ st.write(f"**Levenshtein Similarity Score:** {similarity_score:.2f}")
102
+
103
+ if similarity_score > 0.8: # Adjust the threshold as needed
104
+ st.success("The pronunciation is likely correct based on transcription similarity.")
105
+ else:
106
+ st.error("The pronunciation may be incorrect based on transcription similarity.")
107
+
108
+ elif input_method == "Record":
109
+ st.write("Record or Upload Original Audio")
110
+ original_audio_bytes = audio_recorder(key="original_audio_recorder")
111
+
112
+ if not original_audio_bytes:
113
+ original_audio = st.file_uploader("Or Upload Original Audio", type=["wav", "mp3"])
114
+ if original_audio:
115
+ original_audio_bytes = original_audio.read()
116
+
117
+ if original_audio_bytes:
118
+ with st.spinner("Processing original audio..."):
119
+ st.audio(original_audio_bytes, format="audio/wav")
120
+
121
+ st.write("Record or Upload User Audio")
122
+ user_audio_bytes = audio_recorder(key="user_audio_recorder")
123
+
124
+ if not user_audio_bytes:
125
+ user_audio = st.file_uploader("Or Upload User Audio", type=["wav", "mp3"])
126
+ if user_audio:
127
+ user_audio_bytes = user_audio.read()
128
+
129
+ if user_audio_bytes:
130
+ with st.spinner("Processing user audio..."):
131
+ st.audio(user_audio_bytes, format="audio/wav")
132
+
133
+ # Add a button to perform the test
134
+ if original_audio_bytes and user_audio_bytes:
135
+ if st.button("Perform Testing"):
136
+ with st.spinner("Performing transcription and similarity testing..."):
137
+ transcription_original, transcription_user, similarity_score = evaluate_audio_similarity(original_audio_bytes, user_audio_bytes)
138
+
139
+ # Display results
140
+ st.markdown("---")
141
+ st.subheader("Transcriptions and Similarity Score")
142
+ st.write(f"**Original Transcription:** {transcription_original}")
143
+ st.write(f"**User Transcription:** {transcription_user}")
144
+ st.write(f"**Levenshtein Similarity Score:** {similarity_score:.2f}")
145
+
146
+ if similarity_score > 0.8: # Adjust the threshold as needed
147
+ st.success("The pronunciation is likely correct based on transcription similarity.")
148
+ else:
149
+ st.error("The pronunciation may be incorrect based on transcription similarity.")