muzammil-eds commited on
Commit
af71291
1 Parent(s): 473a050

Files added

Browse files
Files changed (6) hide show
  1. Dockerfile +18 -0
  2. app.py +81 -0
  3. requirements.txt +7 -0
  4. static/script.js +103 -0
  5. static/style.css +173 -0
  6. templates/index.html +52 -0
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as the base image
2
+ FROM python:3.9-slim
3
+ LABEL authors="muzammil"
4
+
5
+ # Set the working directory inside the container
6
+ WORKDIR /app
7
+
8
+ # Copy the current directory contents into the container at /app
9
+ COPY . /app
10
+
11
+ # Install any required dependencies
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Expose the port Flask will run on
15
+ EXPOSE 7860
16
+
17
+ # Command to run the Flask app
18
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, render_template
2
+ import librosa
3
+ import torch
4
+ import Levenshtein
5
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
6
+ from io import BytesIO
7
+ from flask_cors import CORS
8
+ from pydub import AudioSegment # NEW
9
+
10
+ app = Flask(__name__)
11
+ CORS(app)
12
+
13
+ MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
14
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
15
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
16
+
17
+
18
+ def convert_to_wav(audio_bytes):
19
+ """Convert audio bytes to wav format using pydub"""
20
+ try:
21
+ audio = AudioSegment.from_file(BytesIO(audio_bytes)) # Auto-detect format
22
+ wav_io = BytesIO()
23
+ audio.export(wav_io, format="wav")
24
+ wav_io.seek(0)
25
+ return wav_io
26
+ except Exception as e:
27
+ print(f"Error converting audio: {e}")
28
+ return None
29
+
30
+
31
+ def transcribe_audio_hf(audio_bytes):
32
+ """Transcribes the audio using a pretrained Wav2Vec2 model."""
33
+ wav_io = convert_to_wav(audio_bytes) # Convert to wav
34
+ if wav_io is None:
35
+ raise Exception("Could not convert audio to WAV format")
36
+
37
+ speech_array, sampling_rate = librosa.load(wav_io, sr=16000)
38
+ input_values = processor(speech_array, sampling_rate=sampling_rate, return_tensors="pt", padding=True).input_values
39
+ with torch.no_grad():
40
+ logits = model(input_values).logits
41
+ predicted_ids = torch.argmax(logits, dim=-1)
42
+ transcription = processor.batch_decode(predicted_ids)[0].strip()
43
+ return transcription
44
+
45
+
46
+ def levenshtein_similarity(transcription1, transcription2):
47
+ distance = Levenshtein.distance(transcription1, transcription2)
48
+ max_len = max(len(transcription1), len(transcription2))
49
+ return 1 - distance / max_len
50
+
51
+
52
+ @app.route('/')
53
+ def index():
54
+ return render_template('index.html')
55
+
56
+
57
+ @app.route('/transcribe', methods=['POST'])
58
+ def transcribe():
59
+ original_audio = request.files['original_audio']
60
+ user_audio = request.files['user_audio']
61
+
62
+ original_audio_bytes = original_audio.read()
63
+ user_audio_bytes = user_audio.read()
64
+
65
+ try:
66
+ transcription_original = transcribe_audio_hf(original_audio_bytes)
67
+ transcription_user = transcribe_audio_hf(user_audio_bytes)
68
+ except Exception as e:
69
+ return jsonify({"error": str(e)}), 500
70
+
71
+ similarity_score = levenshtein_similarity(transcription_original, transcription_user)
72
+
73
+ return jsonify({
74
+ "transcription_original": transcription_original,
75
+ "transcription_user": transcription_user,
76
+ "similarity_score": similarity_score
77
+ })
78
+
79
+
80
+ if __name__ == '__main__':
81
+ app.run(debug=True)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Flask==2.1.1
2
+ Flask-Cors==3.0.10
3
+ librosa==0.8.1
4
+ torch==1.9.0
5
+ transformers==4.5.1
6
+ pydub==0.25.1
7
+ Levenshtein==0.12.0
static/script.js ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ let mediaRecorder;
2
+ let audioChunks = [];
3
+ let originalAudioBlob = null;
4
+ let userAudioBlob = null;
5
+
6
+ document.getElementById('originalAudio').addEventListener('change', function (e) {
7
+ const file = e.target.files[0];
8
+ const audioPlayer = document.getElementById('originalAudioPlayer');
9
+ const fileURL = URL.createObjectURL(file);
10
+ audioPlayer.src = fileURL;
11
+ audioPlayer.play();
12
+ originalAudioBlob = file;
13
+ });
14
+
15
+ document.getElementById('userAudio').addEventListener('change', function (e) {
16
+ const file = e.target.files[0];
17
+ const audioPlayer = document.getElementById('userAudioPlayer');
18
+ const fileURL = URL.createObjectURL(file);
19
+ audioPlayer.src = fileURL;
20
+ audioPlayer.play();
21
+ userAudioBlob = file;
22
+ });
23
+
24
+ function startRecording(type) {
25
+ audioChunks = [];
26
+
27
+ navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
28
+ mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' }); // Default format is webm
29
+ mediaRecorder.start();
30
+
31
+ mediaRecorder.addEventListener("dataavailable", event => {
32
+ audioChunks.push(event.data);
33
+ });
34
+
35
+ mediaRecorder.addEventListener("stop", () => {
36
+ const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); // Save as .wav
37
+ const audioURL = URL.createObjectURL(audioBlob);
38
+
39
+ if (type === 'original') {
40
+ document.getElementById('originalAudioPlayer').src = audioURL;
41
+ originalAudioBlob = audioBlob;
42
+ } else if (type === 'user') {
43
+ document.getElementById('userAudioPlayer').src = audioURL;
44
+ userAudioBlob = audioBlob;
45
+ }
46
+ });
47
+ });
48
+
49
+ // Add recording animation and disable the start button
50
+ if (type === 'original') {
51
+ document.getElementById('recordOriginalAudio').classList.add('recording-active');
52
+ document.getElementById('recordOriginalAudio').disabled = true;
53
+ document.getElementById('stopOriginalAudio').disabled = false;
54
+ } else {
55
+ document.getElementById('recordUserAudio').classList.add('recording-active');
56
+ document.getElementById('recordUserAudio').disabled = true;
57
+ document.getElementById('stopUserAudio').disabled = false;
58
+ }
59
+ }
60
+
61
+ function stopRecording(type) {
62
+ mediaRecorder.stop();
63
+
64
+ // Remove recording animation and enable the start button
65
+ if (type === 'original') {
66
+ document.getElementById('recordOriginalAudio').classList.remove('recording-active');
67
+ document.getElementById('recordOriginalAudio').disabled = false;
68
+ document.getElementById('stopOriginalAudio').disabled = true;
69
+ } else {
70
+ document.getElementById('recordUserAudio').classList.remove('recording-active');
71
+ document.getElementById('recordUserAudio').disabled = false;
72
+ document.getElementById('stopUserAudio').disabled = true;
73
+ }
74
+ }
75
+
76
+ document.getElementById('performTesting').addEventListener('click', function () {
77
+ if (originalAudioBlob && userAudioBlob) {
78
+ const formData = new FormData();
79
+ formData.append('original_audio', originalAudioBlob, 'original_audio.wav');
80
+ formData.append('user_audio', userAudioBlob, 'user_audio.wav');
81
+
82
+ // Show loader
83
+ document.getElementById('loader').style.display = 'block';
84
+ document.getElementById('results').style.display = 'none';
85
+
86
+ fetch('/transcribe', {
87
+ method: 'POST',
88
+ body: formData
89
+ })
90
+ .then(response => response.json())
91
+ .then(data => {
92
+ // Hide loader and show results
93
+ document.getElementById('loader').style.display = 'none';
94
+ document.getElementById('results').style.display = 'block';
95
+
96
+ document.getElementById('transcriptionOriginal').innerText = `Original Transcription: ${data.transcription_original}`;
97
+ document.getElementById('transcriptionUser').innerText = `User Transcription: ${data.transcription_user}`;
98
+ document.getElementById('similarityScore').innerText = `Similarity Score: ${data.similarity_score.toFixed(2)}`;
99
+ });
100
+ } else {
101
+ alert('Please provide both original and user audio files.');
102
+ }
103
+ });
static/style.css ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Reset some default browser styles */
2
+ * {
3
+ margin: 0;
4
+ padding: 0;
5
+ box-sizing: border-box;
6
+ }
7
+
8
+ /* Make the body take up the full viewport height */
9
+ body {
10
+ font-family: 'Arial', sans-serif;
11
+ background-color: #2c2f33;
12
+ color: white;
13
+ height: 100vh;
14
+ display: flex;
15
+ flex-direction: column;
16
+ justify-content: center;
17
+ align-items: center;
18
+ }
19
+
20
+ /* Center container and add padding for mobile devices */
21
+ .container {
22
+ width: 90%;
23
+ max-width: 1200px;
24
+ margin: auto;
25
+ text-align: center;
26
+ padding: 20px;
27
+ background-color: #1c1e22;
28
+ border-radius: 12px;
29
+ box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.4);
30
+ transition: all 0.3s ease;
31
+ }
32
+
33
+ /* Add hover effect for container */
34
+ .container:hover {
35
+ box-shadow: 0px 6px 24px rgba(0, 0, 0, 0.6);
36
+ }
37
+
38
+ /* Style the headings */
39
+ h1 {
40
+ margin: 20px 0;
41
+ font-size: 2.5rem;
42
+ color: #7289da;
43
+ }
44
+
45
+ /* Make the audio-panel responsive using flexbox */
46
+ .audio-panel {
47
+ display: flex;
48
+ flex-wrap: wrap;
49
+ justify-content: space-around;
50
+ margin: 20px 0;
51
+ }
52
+
53
+ .audio-upload {
54
+ width: 45%;
55
+ min-width: 300px;
56
+ padding: 10px;
57
+ margin-bottom: 20px;
58
+ background-color: #40444b;
59
+ border-radius: 10px;
60
+ transition: transform 0.2s;
61
+ }
62
+
63
+ .audio-upload:hover {
64
+ transform: translateY(-5px);
65
+ }
66
+
67
+ h2 {
68
+ font-size: 1.25rem;
69
+ margin-bottom: 10px;
70
+ color: #99aab5;
71
+ }
72
+
73
+ /* Style for file input */
74
+ input[type="file"] {
75
+ display: block;
76
+ margin: 10px 0;
77
+ background-color: #7289da;
78
+ color: white;
79
+ padding: 10px;
80
+ border-radius: 5px;
81
+ cursor: pointer;
82
+ transition: background-color 0.3s;
83
+ }
84
+
85
+ input[type="file"]:hover {
86
+ background-color: #5b6bb0;
87
+ }
88
+
89
+ /* Style the audio players */
90
+ audio {
91
+ width: 100%;
92
+ margin: 10px 0;
93
+ }
94
+
95
+ /* Style buttons with consistent design */
96
+ button {
97
+ padding: 12px 25px;
98
+ font-size: 1rem;
99
+ background-color: #7289da;
100
+ color: white;
101
+ border: none;
102
+ border-radius: 5px;
103
+ cursor: pointer;
104
+ transition: background-color 0.3s, transform 0.2s;
105
+ margin: 10px 5px;
106
+ }
107
+
108
+ button:hover {
109
+ background-color: #5b6bb0;
110
+ transform: translateY(-3px);
111
+ }
112
+
113
+ /* Loader and result display */
114
+ #loader {
115
+ font-size: 1.25rem;
116
+ color: #7289da;
117
+ margin: 20px 0;
118
+ }
119
+
120
+ .results {
121
+ margin-top: 20px;
122
+ background-color: #40444b;
123
+ padding: 20px;
124
+ border-radius: 10px;
125
+ color: #99aab5;
126
+ text-align: left;
127
+ }
128
+
129
+ .results h3 {
130
+ margin-bottom: 10px;
131
+ color: #7289da;
132
+ }
133
+
134
+ #results p {
135
+ font-size: 1.1rem;
136
+ margin: 5px 0;
137
+ }
138
+
139
+ /* Media query to ensure responsiveness on smaller screens */
140
+ @media (max-width: 768px) {
141
+ .audio-upload {
142
+ width: 100%;
143
+ margin-bottom: 20px;
144
+ }
145
+
146
+ h1 {
147
+ font-size: 2rem;
148
+ }
149
+
150
+ button {
151
+ width: 100%;
152
+ padding: 15px;
153
+ }
154
+ }
155
+
156
+ /* Add recording animation style */
157
+ .recording-active {
158
+ animation: pulse 1s infinite;
159
+ background-color: red;
160
+ color: white;
161
+ }
162
+
163
+ @keyframes pulse {
164
+ 0% {
165
+ box-shadow: 0 0 0 0 rgba(255, 0, 0, 0.7);
166
+ }
167
+ 70% {
168
+ box-shadow: 0 0 0 10px rgba(255, 0, 0, 0);
169
+ }
170
+ 100% {
171
+ box-shadow: 0 0 0 0 rgba(255, 0, 0, 0);
172
+ }
173
+ }
templates/index.html ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Audio Transcription and Similarity Checker</title>
7
+ <link rel="stylesheet" href="/static/style.css">
8
+ </head>
9
+ <body>
10
+ <div class="container">
11
+ <h1>Audio Transcription and Similarity Checker</h1>
12
+
13
+ <!-- Audio upload/record panel -->
14
+ <div class="audio-panel">
15
+ <div class="audio-upload">
16
+ <h2>Upload or Record Original Audio</h2>
17
+ <input type="file" id="originalAudio" accept="audio/*">
18
+ <audio id="originalAudioPlayer" controls></audio>
19
+ <br>
20
+ <button id="recordOriginalAudio" onclick="startRecording('original')">Start Recording</button>
21
+ <button id="stopOriginalAudio" onclick="stopRecording('original')" disabled>Stop Recording</button>
22
+ </div>
23
+ <div class="audio-upload">
24
+ <h2>Upload or Record User Audio</h2>
25
+ <input type="file" id="userAudio" accept="audio/*">
26
+ <audio id="userAudioPlayer" controls></audio>
27
+ <br>
28
+ <button id="recordUserAudio" onclick="startRecording('user')">Start Recording</button>
29
+ <button id="stopUserAudio" onclick="stopRecording('user')" disabled>Stop Recording</button>
30
+ </div>
31
+ </div>
32
+
33
+ <!-- Button to perform similarity check -->
34
+ <button id="performTesting">Perform Testing</button>
35
+
36
+ <!-- Loader while processing -->
37
+ <div id="loader" style="display: none;">
38
+ <p>Processing... Please wait</p>
39
+ </div>
40
+
41
+ <!-- Results section -->
42
+ <div id="results" class="results" style="display: none;">
43
+ <h3>Results</h3>
44
+ <p id="transcriptionOriginal"></p>
45
+ <p id="transcriptionUser"></p>
46
+ <p id="similarityScore"></p>
47
+ </div>
48
+ </div>
49
+
50
+ <script src="/static/script.js"></script>
51
+ </body>
52
+ </html>