ukrainian-ai / app.py
Yurii Paniv
Update TTS to new version
ab18fd9
raw
history blame
2.83 kB
import gradio as gr
from transformers import Conversation, ConversationalPipeline, pipeline, AlbertTokenizerFast
import tempfile
import gradio as gr
from ukrainian_tts.tts import TTS, Voices, Stress
from enum import Enum
tts = TTS() # can try device=cpu|gpu|mps
p = pipeline(
"automatic-speech-recognition", "robinhad/wav2vec2-xls-r-300m-uk"
)
tokenizer = AlbertTokenizerFast.from_pretrained("robinhad/gpt2-uk-conversational")
conv: ConversationalPipeline = pipeline(
"conversational", "robinhad/gpt2-uk-conversational", tokenizer=tokenizer
)
class VoiceOption(Enum):
Tetiana = "Тетяна (жіночий) 👩"
Mykyta = "Микита (чоловічий) 👨"
Lada = "Лада (жіночий) 👩"
Dmytro = "Дмитро (чоловічий) 👨"
voice_mapping = {
VoiceOption.Tetiana.value: Voices.Tetiana.value,
VoiceOption.Mykyta.value: Voices.Mykyta.value,
VoiceOption.Lada.value: Voices.Lada.value,
VoiceOption.Dmytro.value: Voices.Dmytro.value,
}
def transcribe(audio, selected_voice, history):
text = p(audio)["text"]
history = history or []
selected_voice = voice_mapping[selected_voice]
past_user_inputs = [i[0] for i in history]
generated_responses = [i[1] for i in history]
next_output_length = len(tokenizer.encode("".join(generated_responses + past_user_inputs))) + 60
response = conv(Conversation(text, past_user_inputs, generated_responses), max_length=next_output_length, penalty_alpha=0.6, top_k=4)
response = response.generated_responses[-1]
history.append((text, response))
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
_, output_text = tts.tts(response, selected_voice, Stress.Dictionary.value, fp)
return text, fp.name, history, history
with open("README.md") as file:
article = file.read()
article = article[article.find("---\n", 4) + 5 : :]
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath"),
gr.components.Radio(
label="Голос",
choices=[option.value for option in VoiceOption],
value=VoiceOption.Olena.value,
),
"state"],
outputs=[
gr.outputs.Textbox(label="Recognized text"),
gr.outputs.Audio(label="Output", type="filepath"),
gr.outputs.Chatbot(label="Chat"),
"state",
],
description="""Це альфа-версія end-to-end розмовного бота, з яким можна поспілкуватися голосом.
Перейдіть сюди для доступу до текстової версії: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
""",
article=article,
)
iface.launch()