import spaces import tempfile import gradio as gr from streaming_stt_nemo import Model from huggingface_hub import InferenceClient import edge_tts # Initialize default language and STT model default_lang = "en" engines = {default_lang: Model(default_lang)} # Function to transcribe audio to text def transcribe(audio): lang = "en" model = engines[lang] text = model.stt_file(audio)[0] return text # Initialize Huggingface InferenceClient client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") # System instructions for the CrucialCoach system_instructions = "[SYSTEM] You are CrucialCoach, an AI-powered conversational coach. Guide the user through challenging workplace situations using the principles from 'Crucial Conversations'. Ask one question at a time and provide step-by-step guidance.\n\n[USER]" # Decorator for using GPU with a duration of 120 seconds @spaces.GPU(duration=120) def model(text): generate_kwargs = dict( temperature=0.7, max_new_tokens=512, top_p=0.95, repetition_penalty=1, do_sample=True, seed=42, ) formatted_prompt = system_instructions + text + "[CrucialCoach]" stream = client.text_generation( formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False ) output = "" for response in stream: if not response.token.text == "": output += response.token.text return output # Asynchronous function to handle audio input and provide response async def respond(audio): user = transcribe(audio) reply = model(user) communicate = edge_tts.Communicate(reply) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: tmp_path = tmp_file.name await communicate.save(tmp_path) return tmp_path # Gradio theme theme = gr.themes.Base() # Gradio interface for voice chat with gr.Blocks(theme=theme, css="footer {visibility: hidden} textbox {resize: none}", title="CrucialCoach DEMO") as demo: with gr.Tab("🗣️ Crucial Coach Chat"): input_audio = gr.Audio(sources=["microphone"], type="filepath", label="Voice Chat") output_audio = gr.Audio(type="filepath", label="CrucialCoach", interactive=False, autoplay=True, elem_classes="audio") gr.Interface( fn=respond, inputs=input_audio, outputs=output_audio, live=True ) # Queue setup and launch demo.queue(max_size=200) demo.launch()