Spaces:

EnDevSols
/

Arabic-ASR-Flask

Building

App Files Files Community

muzammil-eds commited on Sep 7

Commit

af71291

•

1 Parent(s): 473a050

Files added

Browse files

Files changed (6) hide show

Dockerfile +18 -0
app.py +81 -0
requirements.txt +7 -0
static/script.js +103 -0
static/style.css +173 -0
templates/index.html +52 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+# Use an official Python runtime as the base image
+FROM python:3.9-slim
+LABEL authors="muzammil"
+# Set the working directory inside the container
+WORKDIR /app
+# Copy the current directory contents into the container at /app
+COPY . /app
+# Install any required dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Expose the port Flask will run on
+EXPOSE 7860
+# Command to run the Flask app
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from flask import Flask, request, jsonify, render_template
+import librosa
+import torch
+import Levenshtein
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+from io import BytesIO
+from flask_cors import CORS
+from pydub import AudioSegment  # NEW
+app = Flask(__name__)
+CORS(app)
+MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-arabic"
+processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)
+model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
+def convert_to_wav(audio_bytes):
+    """Convert audio bytes to wav format using pydub"""
+    try:
+        audio = AudioSegment.from_file(BytesIO(audio_bytes))  # Auto-detect format
+        wav_io = BytesIO()
+        audio.export(wav_io, format="wav")
+        wav_io.seek(0)
+        return wav_io
+    except Exception as e:
+        print(f"Error converting audio: {e}")
+        return None
+def transcribe_audio_hf(audio_bytes):
+    """Transcribes the audio using a pretrained Wav2Vec2 model."""
+    wav_io = convert_to_wav(audio_bytes)  # Convert to wav
+    if wav_io is None:
+        raise Exception("Could not convert audio to WAV format")
+    speech_array, sampling_rate = librosa.load(wav_io, sr=16000)
+    input_values = processor(speech_array, sampling_rate=sampling_rate, return_tensors="pt", padding=True).input_values
+    with torch.no_grad():
+        logits = model(input_values).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)[0].strip()
+    return transcription
+def levenshtein_similarity(transcription1, transcription2):
+    distance = Levenshtein.distance(transcription1, transcription2)
+    max_len = max(len(transcription1), len(transcription2))
+    return 1 - distance / max_len
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/transcribe', methods=['POST'])
+def transcribe():
+    original_audio = request.files['original_audio']
+    user_audio = request.files['user_audio']
+    original_audio_bytes = original_audio.read()
+    user_audio_bytes = user_audio.read()
+    try:
+        transcription_original = transcribe_audio_hf(original_audio_bytes)
+        transcription_user = transcribe_audio_hf(user_audio_bytes)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+    similarity_score = levenshtein_similarity(transcription_original, transcription_user)
+    return jsonify({
+        "transcription_original": transcription_original,
+        "transcription_user": transcription_user,
+        "similarity_score": similarity_score
+    })
+if __name__ == '__main__':
+    app.run(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Flask==2.1.1
+Flask-Cors==3.0.10
+librosa==0.8.1
+torch==1.9.0
+transformers==4.5.1
+pydub==0.25.1
+Levenshtein==0.12.0

static/script.js ADDED Viewed

	@@ -0,0 +1,103 @@

+let mediaRecorder;
+let audioChunks = [];
+let originalAudioBlob = null;
+let userAudioBlob = null;
+document.getElementById('originalAudio').addEventListener('change', function (e) {
+    const file = e.target.files[0];
+    const audioPlayer = document.getElementById('originalAudioPlayer');
+    const fileURL = URL.createObjectURL(file);
+    audioPlayer.src = fileURL;
+    audioPlayer.play();
+    originalAudioBlob = file;
+});
+document.getElementById('userAudio').addEventListener('change', function (e) {
+    const file = e.target.files[0];
+    const audioPlayer = document.getElementById('userAudioPlayer');
+    const fileURL = URL.createObjectURL(file);
+    audioPlayer.src = fileURL;
+    audioPlayer.play();
+    userAudioBlob = file;
+});
+function startRecording(type) {
+    audioChunks = [];
+    navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => {
+        mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' }); // Default format is webm
+        mediaRecorder.start();
+        mediaRecorder.addEventListener("dataavailable", event => {
+            audioChunks.push(event.data);
+        });
+        mediaRecorder.addEventListener("stop", () => {
+            const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); // Save as .wav
+            const audioURL = URL.createObjectURL(audioBlob);
+            if (type === 'original') {
+                document.getElementById('originalAudioPlayer').src = audioURL;
+                originalAudioBlob = audioBlob;
+            } else if (type === 'user') {
+                document.getElementById('userAudioPlayer').src = audioURL;
+                userAudioBlob = audioBlob;
+            }
+        });
+    });
+    // Add recording animation and disable the start button
+    if (type === 'original') {
+        document.getElementById('recordOriginalAudio').classList.add('recording-active');
+        document.getElementById('recordOriginalAudio').disabled = true;
+        document.getElementById('stopOriginalAudio').disabled = false;
+    } else {
+        document.getElementById('recordUserAudio').classList.add('recording-active');
+        document.getElementById('recordUserAudio').disabled = true;
+        document.getElementById('stopUserAudio').disabled = false;
+    }
+}
+function stopRecording(type) {
+    mediaRecorder.stop();
+    // Remove recording animation and enable the start button
+    if (type === 'original') {
+        document.getElementById('recordOriginalAudio').classList.remove('recording-active');
+        document.getElementById('recordOriginalAudio').disabled = false;
+        document.getElementById('stopOriginalAudio').disabled = true;
+    } else {
+        document.getElementById('recordUserAudio').classList.remove('recording-active');
+        document.getElementById('recordUserAudio').disabled = false;
+        document.getElementById('stopUserAudio').disabled = true;
+    }
+}
+document.getElementById('performTesting').addEventListener('click', function () {
+    if (originalAudioBlob && userAudioBlob) {
+        const formData = new FormData();
+        formData.append('original_audio', originalAudioBlob, 'original_audio.wav');
+        formData.append('user_audio', userAudioBlob, 'user_audio.wav');
+        // Show loader
+        document.getElementById('loader').style.display = 'block';
+        document.getElementById('results').style.display = 'none';
+        fetch('/transcribe', {
+            method: 'POST',
+            body: formData
+        })
+        .then(response => response.json())
+        .then(data => {
+            // Hide loader and show results
+            document.getElementById('loader').style.display = 'none';
+            document.getElementById('results').style.display = 'block';
+            document.getElementById('transcriptionOriginal').innerText = `Original Transcription: ${data.transcription_original}`;
+            document.getElementById('transcriptionUser').innerText = `User Transcription: ${data.transcription_user}`;
+            document.getElementById('similarityScore').innerText = `Similarity Score: ${data.similarity_score.toFixed(2)}`;
+        });
+    } else {
+        alert('Please provide both original and user audio files.');
+    }
+});

static/style.css ADDED Viewed

	@@ -0,0 +1,173 @@

+/* Reset some default browser styles */
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+}
+/* Make the body take up the full viewport height */
+body {
+    font-family: 'Arial', sans-serif;
+    background-color: #2c2f33;
+    color: white;
+    height: 100vh;
+    display: flex;
+    flex-direction: column;
+    justify-content: center;
+    align-items: center;
+}
+/* Center container and add padding for mobile devices */
+.container {
+    width: 90%;
+    max-width: 1200px;
+    margin: auto;
+    text-align: center;
+    padding: 20px;
+    background-color: #1c1e22;
+    border-radius: 12px;
+    box-shadow: 0px 4px 20px rgba(0, 0, 0, 0.4);
+    transition: all 0.3s ease;
+}
+/* Add hover effect for container */
+.container:hover {
+    box-shadow: 0px 6px 24px rgba(0, 0, 0, 0.6);
+}
+/* Style the headings */
+h1 {
+    margin: 20px 0;
+    font-size: 2.5rem;
+    color: #7289da;
+}
+/* Make the audio-panel responsive using flexbox */
+.audio-panel {
+    display: flex;
+    flex-wrap: wrap;
+    justify-content: space-around;
+    margin: 20px 0;
+}
+.audio-upload {
+    width: 45%;
+    min-width: 300px;
+    padding: 10px;
+    margin-bottom: 20px;
+    background-color: #40444b;
+    border-radius: 10px;
+    transition: transform 0.2s;
+}
+.audio-upload:hover {
+    transform: translateY(-5px);
+}
+h2 {
+    font-size: 1.25rem;
+    margin-bottom: 10px;
+    color: #99aab5;
+}
+/* Style for file input */
+input[type="file"] {
+    display: block;
+    margin: 10px 0;
+    background-color: #7289da;
+    color: white;
+    padding: 10px;
+    border-radius: 5px;
+    cursor: pointer;
+    transition: background-color 0.3s;
+}
+input[type="file"]:hover {
+    background-color: #5b6bb0;
+}
+/* Style the audio players */
+audio {
+    width: 100%;
+    margin: 10px 0;
+}
+/* Style buttons with consistent design */
+button {
+    padding: 12px 25px;
+    font-size: 1rem;
+    background-color: #7289da;
+    color: white;
+    border: none;
+    border-radius: 5px;
+    cursor: pointer;
+    transition: background-color 0.3s, transform 0.2s;
+    margin: 10px 5px;
+}
+button:hover {
+    background-color: #5b6bb0;
+    transform: translateY(-3px);
+}
+/* Loader and result display */
+#loader {
+    font-size: 1.25rem;
+    color: #7289da;
+    margin: 20px 0;
+}
+.results {
+    margin-top: 20px;
+    background-color: #40444b;
+    padding: 20px;
+    border-radius: 10px;
+    color: #99aab5;
+    text-align: left;
+}
+.results h3 {
+    margin-bottom: 10px;
+    color: #7289da;
+}
+#results p {
+    font-size: 1.1rem;
+    margin: 5px 0;
+}
+/* Media query to ensure responsiveness on smaller screens */
+@media (max-width: 768px) {
+    .audio-upload {
+        width: 100%;
+        margin-bottom: 20px;
+    }
+    h1 {
+        font-size: 2rem;
+    }
+    button {
+        width: 100%;
+        padding: 15px;
+    }
+}
+/* Add recording animation style */
+.recording-active {
+    animation: pulse 1s infinite;
+    background-color: red;
+    color: white;
+}
+@keyframes pulse {
+    0% {
+        box-shadow: 0 0 0 0 rgba(255, 0, 0, 0.7);
+    }
+    70% {
+        box-shadow: 0 0 0 10px rgba(255, 0, 0, 0);
+    }
+    100% {
+        box-shadow: 0 0 0 0 rgba(255, 0, 0, 0);
+    }
+}

templates/index.html ADDED Viewed

	@@ -0,0 +1,52 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Audio Transcription and Similarity Checker</title>
+    <link rel="stylesheet" href="/static/style.css">
+</head>
+<body>
+    <div class="container">
+        <h1>Audio Transcription and Similarity Checker</h1>
+        <!-- Audio upload/record panel -->
+        <div class="audio-panel">
+            <div class="audio-upload">
+                <h2>Upload or Record Original Audio</h2>
+                <input type="file" id="originalAudio" accept="audio/*">
+                <audio id="originalAudioPlayer" controls></audio>
+                <br>
+                <button id="recordOriginalAudio" onclick="startRecording('original')">Start Recording</button>
+                <button id="stopOriginalAudio" onclick="stopRecording('original')" disabled>Stop Recording</button>
+            </div>
+            <div class="audio-upload">
+                <h2>Upload or Record User Audio</h2>
+                <input type="file" id="userAudio" accept="audio/*">
+                <audio id="userAudioPlayer" controls></audio>
+                <br>
+                <button id="recordUserAudio" onclick="startRecording('user')">Start Recording</button>
+                <button id="stopUserAudio" onclick="stopRecording('user')" disabled>Stop Recording</button>
+            </div>
+        </div>
+        <!-- Button to perform similarity check -->
+        <button id="performTesting">Perform Testing</button>
+        <!-- Loader while processing -->
+        <div id="loader" style="display: none;">
+            <p>Processing... Please wait</p>
+        </div>
+        <!-- Results section -->
+        <div id="results" class="results" style="display: none;">
+            <h3>Results</h3>
+            <p id="transcriptionOriginal"></p>
+            <p id="transcriptionUser"></p>
+            <p id="similarityScore"></p>
+        </div>
+    </div>
+    <script src="/static/script.js"></script>
+</body>
+</html>