import json import requests import os import traceback import gradio as gr # Imports the Gradio library, which is used to create user interfaces for machine learning models. HF_TOKEN = os.environ.get("HF_TOKEN", None) API_URL = "https://api-inference.huggingface.co/models/" def s2t(audio, model_name): with open(audio, "rb") as f: data = f.read() try: url = API_URL + model_name headers = {"Authorization": f"Bearer {HF_TOKEN}"} response = requests.request("POST", url, headers=headers, data=data) text = json.loads(response.content.decode("utf-8")) text = text['text'] except: text = f"Transcription failed with error:\n{traceback.format_exc()}" yield text with gr.Blocks() as demo: with gr.Column(): voice = gr.Audio(sources=["microphone"], type="filepath", label="Voice") audio = gr.Audio(sources=["upload"], type="filepath", label="Audio file") model_name = gr.Dropdown( label="Models:", choices=[ "openai/whisper-large-v3", "openai/whisper-large-v2", "openai/whisper-large", "openai/whisper-medium", "openai/whisper-small", "openai/whisper-base", "openai/whisper-tiny", ], value="openai/whisper-large-v3", ) with gr.Column(): output = gr.Textbox(label="Transcription results") voice.stop_recording(s2t, inputs=[voice, model_name], outputs=output) audio.upload(s2t, inputs=[audio, model_name], outputs=output) demo.queue().launch(max_threads=4)