from transformers import pipeline from transformers import WhisperForConditionalGeneration, WhisperProcessor, WhisperFeatureExtractor import gradio as gr import librosa # Prepare model for prediction MODEL_SPECS_ID = "dmatekenya/whisper-small_finetuned_sh_chich" MODEL_SPECS_BASE_ID = "openai/whisper-small" MODEL_SPECS_BASE_LAN_SW = "swahili" MODEL_SPECS_BASE_LAN_SH = "shona" FEATURE_EXTRACTOR = WhisperFeatureExtractor.from_pretrained(MODEL_SPECS_ID) PROCESSOR_SH = WhisperProcessor.from_pretrained(MODEL_SPECS_BASE_ID, language=MODEL_SPECS_BASE_LAN_SH, task="transcribe") MODEL = WhisperForConditionalGeneration.from_pretrained(MODEL_SPECS_ID) def transcribe(audio_file): y, sr = librosa.load(audio_file, sr=16000) input_features = PROCESSOR_SH(y, return_tensors="pt", sampling_rate=sr).input_features generated_ids = MODEL.generate(inputs=input_features) transcription = PROCESSOR_SH.batch_decode(generated_ids, skip_special_tokens=True)[0] return transcription def transcribe_audio(mic=None, file=None): if mic is not None: audio = mic elif file is not None: audio = file else: return "You must either provide a mic recording or a file" transcription = transcribe(audio_file=audio) return transcription title = "Transcribe Chichewa Audio" description = """ IN THIS DEMO, TEST THE FIRST AUTOMATED SPEECH RECOGNITION (ASR) MODEL FOR CHICHEWA BY TRANSCRIBING YOUR CHICHEWA VOICE NOTES. FOR AUDIO FILES, PLEASE UPLOAD SHORT VOICE NOTES ONLY (NO LONGER THAN 30 SEC). """ article = "Read more about the [ChichewaSpeech2Text](https://dmatekenya.github.io/Chichewa-Speech2Text/README.html) project \ and make sure to sign-up for our first [voice note donation event](https://forms.gle/fHLESutofVvb2YFM9) on July 22. \ You stand a chance to win Airtel or TNM units if you choose to participate in the raffle after the event" gr.Interface( fn=transcribe_audio, theme='grass', title=title, description=description, article=article, inputs=[ gr.Audio(source="microphone", type="filepath", optional=True), gr.Audio(source="upload", type="filepath", optional=True), ], outputs="text", ).launch()