import gradio as gr
from huggingface_hub import InferenceClient
from gtts import gTTS
import os
import tempfile

"""IMMY ON device Alpha version with TTS"""

client = InferenceClient("Daemontatox/IMMY_1f")

def text_to_speech(text):
    """Convert text to speech and return the path to the audio file"""
    tts = gTTS(text=text, lang='en')
    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as fp:
        tts.save(fp.name)
        return fp.name

def respond(
    message, 
    history: list[tuple[str, str]], 
    system_message, 
    max_tokens, 
    temperature, 
    top_p,
):
    messages = [{"role": "system", "content": system_message}]
    
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    
    messages.append({"role": "user", "content": message})
    response = ""
    
    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content
        response += token
        yield response
    
    # Generate audio after the complete response is ready
    audio_path = text_to_speech(response)
    yield (response, audio_path)

with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Message")
    audio_output = gr.Audio(label="Response Audio")
    
    system_message = gr.Textbox(
        value="You are Immy, a magical AI-powered teddy bear who loves chatting with children. You are kind, funny, and full of wonder, always ready to tell stories, answer questions, and offer friendly advice. Speak playfully and patiently, using simple, child-friendly language to encourage curiosity, learning, and imagination.",
        label="System message"
    )
    max_tokens = gr.Slider(
        minimum=1,
        maximum=2048,
        value=512,
        step=1,
        label="Max new tokens"
    )
    temperature = gr.Slider(
        minimum=0.1,
        maximum=4.0,
        value=0.7,
        step=0.1,
        label="Temperature"
    )
    top_p = gr.Slider(
        minimum=0.1,
        maximum=1.0,
        value=0.95,
        step=0.05,
        label="Top-p (nucleus sampling)"
    )

    def user(user_message, history):
        return "", history + [[user_message, None]]

    def bot(
        history,
        system_message,
        max_tokens,
        temperature,
        top_p
    ):
        user_message = history[-1][0]
        generator = respond(
            user_message,
            history[:-1],
            system_message,
            max_tokens,
            temperature,
            top_p
        )
        
        for response in generator:
            if isinstance(response, tuple):
                # This is the final response with audio
                history[-1][1] = response[0]
                yield history, response[1]
            else:
                # This is a text update
                history[-1][1] = response
                yield history, None

    msg.submit(
        user,
        [msg, chatbot],
        [msg, chatbot],
        queue=False
    ).then(
        bot,
        [chatbot, system_message, max_tokens, temperature, top_p],
        [chatbot, audio_output]
    )

if __name__ == "__main__":
    demo.launch()