Spaces:

robinhad
/

ukrainian-ai

Configuration error

App Files Files Community

Yurii Paniv commited on Jun 14, 2022

Commit

9ce0232

•

1 Parent(s): e96206b

Add end-to-end speaking demo

Browse files

Files changed (4) hide show

README.md +8 -10
app.py +44 -36
gpt2-uk-conversational +1 -0
requirements.txt +2 -1

README.md CHANGED Viewed

@@ -13,19 +13,17 @@ This is a pet project with aim to provide an end-to-end voice chatbot with abili
 It's a project with an aim to demonstrate current state-of-the-art speech technologies for Ukrainian language.
 # Technologies used:
-- [ ] [Wav2Vec2 XLS-R 300M fine-tuned to Ukrainian language. WER: `31.56%`](https://huggingface.co/robinhad/wav2vec2-xls-r-300m-uk) for speech recognition.
-- [ ] [GlowTTS, trained on M-AILABS dataset](https://github.com/robinhad/ukrainian-tts).
-- [ ] Conversational pipeline (TBD)
-# Features
-- [ ] Ability to change backends
-- [ ] Support for Google Text-to-Speech/Speech-to-Text
-- [ ] Echo backend (speaks recognized phrase)
-- [ ] Other models
 # How to setup:
-TBD

 It's a project with an aim to demonstrate current state-of-the-art speech technologies for Ukrainian language.
+Link to speaking demo: [https://huggingface.co/spaces/robinhad/ukrainian-ai](https://huggingface.co/spaces/robinhad/ukrainian-ai)
+Link to text demo: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
 # Technologies used:
+- [Wav2Vec2 XLS-R 300M fine-tuned to Ukrainian language](https://huggingface.co/Yehor/wav2vec2-xls-r-300m-uk-with-small-lm) for speech recognition.
+- [Ukrainian VITS TTS](https://github.com/robinhad/ukrainian-tts) for text-to-speech generation.
+- Conversational pipeline (this repository)
+TODO: training scripts for conversational pipeline
 # How to setup:
+1. `pip install -r requirements.txt`
+2. `python app.py`

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
-import random
 import gradio as gr
-from transformers import pipeline
 import tempfile
 import torch
 from os.path import exists
@@ -12,7 +11,7 @@ def download(url, file_name):
     if not exists(file_name):
         print(f"Downloading {file_name}")
         r = requests.get(url, allow_redirects=True)
-        with open(file_name, 'wb') as file:
             file.write(r.content)
     else:
         print(f"Found {file_name}. Skipping download...")
@@ -29,47 +28,56 @@ config_path = "config.json"
 download(model_link, model_path)
 download(config_link, config_path)
-p = pipeline("automatic-speech-recognition", "Yehor/wav2vec2-xls-r-300m-uk-with-small-lm")
 synthesizer = Synthesizer(
-    model_path, config_path, None, None, None,
 )
-badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.ukrainian-ai"
-def transcribe(audio):
     text = p(audio)["text"]
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         with torch.no_grad():
-            wavs = synthesizer.tts(text)
             synthesizer.save_wav(wavs, fp)
-        return text, fp.name
-gr.Interface(
-    fn=transcribe,
-    inputs=gr.inputs.Audio(source="microphone", type="filepath"),
-    outputs=[gr.outputs.Textbox(label="Recognized text"),gr.outputs.Audio(label="Output")],
-    article=f"<center><img src=\"{badge}\" alt=\"visitors badge\"/></center>",).launch()
-def chat(message, history):
-    history = history or []
-    #if message.startswith("How many"):
-    #    response = random.randint(1, 10)
-    #elif message.startswith("How"):
-    #    response = random.choice(["Great", "Good", "Okay", "Bad"])
-    #elif message.startswith("Where"):
-    #    response = random.choice(["Here", "There", "Somewhere"])
-    #else:
-    #    response = "I don't know"
-    #history.append((message, response))
-    return history, history
-#iface = gr.Interface(
-#    chat,
-#    ["audio", "state"],
-#    ["chatbot", "state"],
-#    allow_screenshot=False,
-#    allow_flagging="never",
-#)
-#iface.launch()

 import gradio as gr
+from transformers import Conversation, ConversationalPipeline, pipeline
 import tempfile
 import torch
 from os.path import exists
     if not exists(file_name):
         print(f"Downloading {file_name}")
         r = requests.get(url, allow_redirects=True)
+        with open(file_name, "wb") as file:
             file.write(r.content)
     else:
         print(f"Found {file_name}. Skipping download...")
 download(model_link, model_path)
 download(config_link, config_path)
+p = pipeline(
+    "automatic-speech-recognition", "Yehor/wav2vec2-xls-r-300m-uk-with-small-lm"
+)
+conv: ConversationalPipeline = pipeline(
+    "conversational", "robinhad/gpt2-uk-conversational", use_auth_token=True
+)
 synthesizer = Synthesizer(
+    model_path,
+    config_path,
+    None,
+    None,
+    None,
+)
+badge = (
+    "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.ukrainian-ai"
 )
+def transcribe(audio, history):
     text = p(audio)["text"]
+    history = history or []
+    past_user_inputs = [i[0] for i in history]
+    generated_responses = [i[1] for i in history]
+    response = conv(Conversation(text, past_user_inputs, generated_responses))
+    response = response.generated_responses[-1]
+    history.append((text, response))
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         with torch.no_grad():
+            wavs = synthesizer.tts(response)
             synthesizer.save_wav(wavs, fp)
+        return text, fp.name, history, history
+iface = gr.Interface(
+    fn=transcribe,
+    inputs=[gr.inputs.Audio(source="microphone", type="filepath"), "state"],
+    outputs=[
+        gr.outputs.Textbox(label="Recognized text"),
+        gr.outputs.Audio(label="Output"),
+        gr.outputs.Chatbot(label="Chat"),
+        "state",
+    ],
+    description="""Це альфа-версія end-to-end розмовного бота, з яким можна поспілкуватися голосом.
+    Перейдіть сюди для доступу до текстової версії: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
+    """,
+    article=f"""Розпізнавання української: [https://huggingface.co/Yehor/wav2vec2-xls-r-300m-uk-with-small-lm](https://huggingface.co/Yehor/wav2vec2-xls-r-300m-uk-with-small-lm)
+    Синтез української: [https://huggingface.co/spaces/robinhad/ukrainian-tts](https://huggingface.co/spaces/robinhad/ukrainian-tts)
+    <center><img src="{badge}" alt="visitors badge"/></center>""",
+)
+iface.launch()

gpt2-uk-conversational ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit c70fdb543d8bf0509e5787ce3a7e768ef52e6991

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ transformers==4.19.4
 TTS==0.6.2
 torch
 pyctcdecode
-https://github.com/kpu/kenlm/archive/master.zip

 TTS==0.6.2
 torch
 pyctcdecode
+https://github.com/kpu/kenlm/archive/master.zip
+sentencepiece==0.1.96