ukrainian-ai / app.py
Yurii Paniv
Change gen parameters
ce406e3
raw
history blame contribute delete
No virus
2.8 kB
import gradio as gr
from transformers import Conversation, ConversationalPipeline, pipeline, AlbertTokenizerFast
import tempfile
import gradio as gr
from ukrainian_tts.tts import TTS, Voices, Stress
from enum import Enum
tts = TTS() # can try device=cpu|gpu|mps
p = pipeline(
"automatic-speech-recognition", "robinhad/wav2vec2-xls-r-300m-uk"
)
tokenizer = AlbertTokenizerFast.from_pretrained("robinhad/gpt2-uk-conversational")
conv: ConversationalPipeline = pipeline(
"conversational", "robinhad/gpt2-uk-conversational", tokenizer=tokenizer
)
class VoiceOption(Enum):
Tetiana = "Тетяна (жіночий) 👩"
Mykyta = "Микита (чоловічий) 👨"
Lada = "Лада (жіночий) 👩"
Dmytro = "Дмитро (чоловічий) 👨"
voice_mapping = {
VoiceOption.Tetiana.value: Voices.Tetiana.value,
VoiceOption.Mykyta.value: Voices.Mykyta.value,
VoiceOption.Lada.value: Voices.Lada.value,
VoiceOption.Dmytro.value: Voices.Dmytro.value,
}
def transcribe(audio, selected_voice, history):
text = p(audio)["text"]
history = history or []
selected_voice = voice_mapping[selected_voice]
past_user_inputs = [i[0] for i in history]
generated_responses = [i[1] for i in history]
next_output_length = len(tokenizer.encode("".join(generated_responses + past_user_inputs))) + 60
response = conv(Conversation(text, past_user_inputs, generated_responses), max_length=next_output_length)
response = response.generated_responses[-1]
history.append((text, response))
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
_, output_text = tts.tts(response, selected_voice, Stress.Dictionary.value, fp)
return text, fp.name, history, history
with open("README.md") as file:
article = file.read()
article = article[article.find("---\n", 4) + 5 : :]
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath"),
gr.components.Radio(
label="Голос",
choices=[option.value for option in VoiceOption],
value=VoiceOption.Tetiana.value,
),
"state"],
outputs=[
gr.outputs.Textbox(label="Recognized text"),
gr.outputs.Audio(label="Output", type="filepath"),
gr.outputs.Chatbot(label="Chat"),
"state",
],
description="""Це альфа-версія end-to-end розмовного бота, з яким можна поспілкуватися голосом.
Перейдіть сюди для доступу до текстової версії: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
""",
article=article,
)
iface.launch()