import gradio as gr from transformers import Conversation, ConversationalPipeline, pipeline, AlbertTokenizerFast import tempfile import gradio as gr from ukrainian_tts.tts import TTS, Voices, Stress from enum import Enum tts = TTS() # can try device=cpu|gpu|mps p = pipeline( "automatic-speech-recognition", "robinhad/wav2vec2-xls-r-300m-uk" ) tokenizer = AlbertTokenizerFast.from_pretrained("robinhad/gpt2-uk-conversational") conv: ConversationalPipeline = pipeline( "conversational", "robinhad/gpt2-uk-conversational", tokenizer=tokenizer ) class VoiceOption(Enum): Tetiana = "Тетяна (жіночий) 👩" Mykyta = "Микита (чоловічий) 👨" Lada = "Лада (жіночий) 👩" Dmytro = "Дмитро (чоловічий) 👨" voice_mapping = { VoiceOption.Tetiana.value: Voices.Tetiana.value, VoiceOption.Mykyta.value: Voices.Mykyta.value, VoiceOption.Lada.value: Voices.Lada.value, VoiceOption.Dmytro.value: Voices.Dmytro.value, } def transcribe(audio, selected_voice, history): text = p(audio)["text"] history = history or [] selected_voice = voice_mapping[selected_voice] past_user_inputs = [i[0] for i in history] generated_responses = [i[1] for i in history] next_output_length = len(tokenizer.encode("".join(generated_responses + past_user_inputs))) + 60 response = conv(Conversation(text, past_user_inputs, generated_responses), max_length=next_output_length) response = response.generated_responses[-1] history.append((text, response)) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: _, output_text = tts.tts(response, selected_voice, Stress.Dictionary.value, fp) return text, fp.name, history, history with open("README.md") as file: article = file.read() article = article[article.find("---\n", 4) + 5 : :] iface = gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath"), gr.components.Radio( label="Голос", choices=[option.value for option in VoiceOption], value=VoiceOption.Tetiana.value, ), "state"], outputs=[ gr.outputs.Textbox(label="Recognized text"), gr.outputs.Audio(label="Output", type="filepath"), gr.outputs.Chatbot(label="Chat"), "state", ], description="""Це альфа-версія end-to-end розмовного бота, з яким можна поспілкуватися голосом. Перейдіть сюди для доступу до текстової версії: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational) """, article=article, ) iface.launch()