from transformers import pipeline import gradio as gr model = pipeline("automatic-speech-recognition") def transcribe_audio(mic=None, file=None): if mic is not None: audio = mic elif file is not None: audio = file else: return("You must either provide a mic recording or a file") transcription = model(audio)["text"] return transcription gr.Interface( fn=transcribe_audio, inputs=[gr.inputs.Audio(source="microphone", type="filepath", optional=True), gr.inputs.Audio(source ="upload", type="filepath", optional=True)], outputs="text", ).launch()