import json import requests import os import traceback import gradio as gr # Imports the Gradio library, which is used to create user interfaces for machine learning models. HF_TOKEN = os.environ.get("HF_TOKEN", None) API_URL = "https://api-inference.huggingface.co/models/" def speech_recognize(audio, model_name): with open(audio, "rb") as f: data = f.read() try: url = API_URL + model_name headers = {"Authorization": f"Bearer {HF_TOKEN}"} response = requests.request("POST", url, headers=headers, data=data) text = json.loads(response.content.decode("utf-8")) text = text['text'] except: text = f"Transcription failed with error:\n{traceback.format_exc()}" yield text with gr.Blocks() as demo: with gr.Row(): with gr.Column(): audio = gr.Audio(sources=["microphone","upload"], type="filepath") model_name = gr.Dropdown( label="Models:", choices=[ "openai/whisper-large-v3", "openai/whisper-large-v2", "openai/whisper-large", "openai/whisper-medium", "openai/whisper-small", "openai/whisper-base", "openai/whisper-tiny", ], value="openai/whisper-large-v3", ) with gr.Column(): output = gr.Textbox(label="Transcription results") audio.stop_recording(speech_recognize, inputs=[audio, model_name], outputs=[output], concurrency_limit=4) audio.upload(speech_recognize, inputs=[audio, model_name], outputs=[output], concurrency_limit=4) demo.queue(max_size=4).launch()