Spaces:
Configuration error
Configuration error
File size: 2,804 Bytes
03ce9f7 eeaef84 19ae03c eeaef84 19ae03c ab18fd9 03ce9f7 9ce0232 eeaef84 9ce0232 eeaef84 9ce0232 eeaef84 9ce0232 19ae03c eeaef84 ab18fd9 eeaef84 9ce0232 eeaef84 ab18fd9 eeaef84 19ae03c 6ede7a5 eeaef84 19ae03c 9ce0232 eeaef84 9ce0232 eeaef84 ce406e3 9ce0232 19ae03c eeaef84 9ce0232 03ce9f7 e2a6612 03ce9f7 9ce0232 eeaef84 7037773 eeaef84 9ce0232 eeaef84 9ce0232 e2a6612 9ce0232 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
from transformers import Conversation, ConversationalPipeline, pipeline, AlbertTokenizerFast
import tempfile
import gradio as gr
from ukrainian_tts.tts import TTS, Voices, Stress
from enum import Enum
tts = TTS() # can try device=cpu|gpu|mps
p = pipeline(
"automatic-speech-recognition", "robinhad/wav2vec2-xls-r-300m-uk"
)
tokenizer = AlbertTokenizerFast.from_pretrained("robinhad/gpt2-uk-conversational")
conv: ConversationalPipeline = pipeline(
"conversational", "robinhad/gpt2-uk-conversational", tokenizer=tokenizer
)
class VoiceOption(Enum):
Tetiana = "Тетяна (жіночий) 👩"
Mykyta = "Микита (чоловічий) 👨"
Lada = "Лада (жіночий) 👩"
Dmytro = "Дмитро (чоловічий) 👨"
voice_mapping = {
VoiceOption.Tetiana.value: Voices.Tetiana.value,
VoiceOption.Mykyta.value: Voices.Mykyta.value,
VoiceOption.Lada.value: Voices.Lada.value,
VoiceOption.Dmytro.value: Voices.Dmytro.value,
}
def transcribe(audio, selected_voice, history):
text = p(audio)["text"]
history = history or []
selected_voice = voice_mapping[selected_voice]
past_user_inputs = [i[0] for i in history]
generated_responses = [i[1] for i in history]
next_output_length = len(tokenizer.encode("".join(generated_responses + past_user_inputs))) + 60
response = conv(Conversation(text, past_user_inputs, generated_responses), max_length=next_output_length)
response = response.generated_responses[-1]
history.append((text, response))
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
_, output_text = tts.tts(response, selected_voice, Stress.Dictionary.value, fp)
return text, fp.name, history, history
with open("README.md") as file:
article = file.read()
article = article[article.find("---\n", 4) + 5 : :]
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath"),
gr.components.Radio(
label="Голос",
choices=[option.value for option in VoiceOption],
value=VoiceOption.Tetiana.value,
),
"state"],
outputs=[
gr.outputs.Textbox(label="Recognized text"),
gr.outputs.Audio(label="Output", type="filepath"),
gr.outputs.Chatbot(label="Chat"),
"state",
],
description="""Це альфа-версія end-to-end розмовного бота, з яким можна поспілкуватися голосом.
Перейдіть сюди для доступу до текстової версії: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
""",
article=article,
)
iface.launch()
|