import gradio as gr import requests import os # API information for Hugging Face Inference API API_URL = "https://api-inference.huggingface.co/models/jonatasgrosman/wav2vec2-large-xlsr-53-arabic" # Fetch the API token from Hugging Face Secrets hf_api_token = os.getenv("HF_API_TOKEN") headers = {"Authorization": f"Bearer {hf_api_token}"} def query(filename): """ Queries the Hugging Face API to transcribe audio from a file. Args: filename (str): Path to the audio file. Returns: dict: The response from the Hugging Face API with transcription. """ with open(filename, "rb") as f: data = f.read() response = requests.post(API_URL, headers=headers, data=data) return response.json() def transcribe_audio_hf(audio_path): """ Transcribes the audio using the Hugging Face Inference API. Args: audio_path (str): Path to the audio file. Returns: str: The transcription from the API. """ result = query(audio_path) transcription = result.get('text', '').strip() return transcription def levenshtein_similarity(transcription1, transcription2): """ Calculate the Levenshtein similarity between two transcriptions. Args: transcription1 (str): The first transcription. transcription2 (str): The second transcription. Returns: float: A normalized similarity score between 0 and 1, where 1 indicates identical transcriptions. """ import Levenshtein distance = Levenshtein.distance(transcription1, transcription2) max_len = max(len(transcription1), len(transcription2)) return 1 - distance / max_len # Normalize to get similarity score def evaluate_audio_similarity(original_audio, user_audio): """ Compares the similarity between the transcription of an original audio file and a user's audio file. Args: original_audio (str): Path to the original audio file. user_audio (str): Path to the user's audio file. Returns: tuple: Transcriptions and Levenshtein similarity score. """ transcription_original = transcribe_audio_hf(original_audio) transcription_user = transcribe_audio_hf(user_audio) similarity_score_levenshtein = levenshtein_similarity(transcription_original, transcription_user) return transcription_original, transcription_user, similarity_score_levenshtein def perform_testing(original_audio, user_audio): if original_audio is not None and user_audio is not None: transcription_original, transcription_user, similarity_score = evaluate_audio_similarity(original_audio, user_audio) return ( f"**Original Transcription:** {transcription_original}", f"**User Transcription:** {transcription_user}", f"**Levenshtein Similarity Score:** {similarity_score:.2f}" ) # Gradio Interface with gr.Blocks() as app: gr.Markdown("# Audio Transcription and Similarity Checker using Hugging Face Inference API") with gr.Tab("Upload"): original_audio_upload = gr.Audio(label="Upload Original Audio", type="filepath") user_audio_upload = gr.Audio(label="Upload User Audio", type="filepath") upload_button = gr.Button("Perform Testing") output_original_transcription = gr.Markdown() output_user_transcription = gr.Markdown() output_similarity_score = gr.Markdown() upload_button.click( perform_testing, inputs=[original_audio_upload, user_audio_upload], outputs=[output_original_transcription, output_user_transcription, output_similarity_score] ) app.launch()