# pip install ffmpeg ffprobe import os import gradio as gr from transformers import pipeline from transformers import WhisperProcessor pipe = pipeline(task="automatic-speech-recognition", model="openai/whisper-tiny") processor = WhisperProcessor.from_pretrained("openai/whisper-tiny") pipe.model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe") def recognize(input_audio): # print('?????????-------------') # print(type(input_audio)) # print(input_audio) output_text = pipe(input_audio)['text'] return output_text with gr.Blocks() as demo: gr.Markdown(""" """) # 录音功能 input_audio = gr.Audio(source="microphone", type="filepath", label='Audio') audio_button = gr.Button(value="Recognize!") text = gr.Textbox(lines=2, label='你要不要看看你说了啥???') audio_button.click(recognize, inputs=[input_audio], outputs=[text],api_name='predict') demo.launch()