Spaces:
Sleeping
Sleeping
File size: 2,511 Bytes
0441ef7 788cf13 0441ef7 788cf13 0441ef7 788cf13 0441ef7 788cf13 ef1d0d3 0441ef7 788cf13 2895ffe 0441ef7 2895ffe 0441ef7 788cf13 0441ef7 788cf13 1918a73 0441ef7 1918a73 0441ef7 788cf13 ef1d0d3 788cf13 791889e 6d7626f fd7b0d8 791889e 788cf13 0441ef7 788cf13 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import spaces
import tempfile
import gradio as gr
from streaming_stt_nemo import Model
from huggingface_hub import InferenceClient
import edge_tts
# Initialize default language and STT model
default_lang = "en"
engines = {default_lang: Model(default_lang)}
# Function to transcribe audio to text
def transcribe(audio):
lang = "en"
model = engines[lang]
text = model.stt_file(audio)[0]
return text
# Initialize Huggingface InferenceClient
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
# System instructions for the CrucialCoach
system_instructions = "[SYSTEM] You are CrucialCoach, an AI-powered conversational coach. Guide the user through challenging workplace situations using the principles from 'Crucial Conversations'. Ask one question at a time and provide step-by-step guidance.\n\n[USER]"
# Decorator for using GPU with a duration of 120 seconds
@spaces.GPU(duration=120)
def model(text):
generate_kwargs = dict(
temperature=0.7,
max_new_tokens=512,
top_p=0.95,
repetition_penalty=1,
do_sample=True,
seed=42,
)
formatted_prompt = system_instructions + text + "[CrucialCoach]"
stream = client.text_generation(
formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False
)
output = ""
for response in stream:
if not response.token.text == "</s>":
output += response.token.text
return output
# Asynchronous function to handle audio input and provide response
async def respond(audio):
user = transcribe(audio)
reply = model(user)
communicate = edge_tts.Communicate(reply)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path
# Gradio theme
theme = gr.themes.Base()
# Gradio interface for voice chat
with gr.Blocks(theme=theme, css="footer {visibility: hidden} textbox {resize: none}", title="CrucialCoach DEMO") as demo:
with gr.Tab("🗣️ Crucial Coach Chat"):
input_audio = gr.Audio(sources=["microphone"], type="filepath", label="Voice Chat")
output_audio = gr.Audio(type="filepath", label="CrucialCoach", interactive=False, autoplay=True, elem_classes="audio")
gr.Interface(
fn=respond,
inputs=input_audio,
outputs=output_audio,
live=True
)
# Queue setup and launch
demo.queue(max_size=200)
demo.launch()
|