import tempfile ,os import gradio as gr from transformers import VitsModel, AutoTokenizer,pipeline import torch import numpy as np import torchaudio # Load model directly from transformers import AutoProcessor, AutoModelForCTC processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1") model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1") def ASR(audio): pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1") text=pipe(torchaudio.load(audio)) return text iface = gr.Interface(fn=TTS, inputs="audio", outputs="text") iface.launch(share=False)