import spaces
import gradio as gr
import torch
import subprocess
import aiohttp
from gradio import State
import asyncio
import json
import asyncio

# Function to start the ochat server
@spaces.GPU
def start_ochat_server():
    print(f"Is CUDA available: {torch.cuda.is_available()}")
    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")

    command = [
        "python", "-m", "ochat.serving.openai_api_server", 
        "--model", "openchat/openchat_3.5"
    ]

    # Start the server in a separate process
    try:
        subprocess.Popen(command)
        return "ochat server started successfully"
    except Exception as e:
        return f"Failed to start ochat server: {e}"


start_ochat_server()

# Function to send a message to the ochat server and get a response
async def chat_with_ochat(message):
    base_url = "http://localhost:18888"
    chat_url = f"{base_url}/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    data = {
        "model": "openchat_3.5",
        "messages": [{"role": "user", "content": message}]
    }

    async with aiohttp.ClientSession() as session:
        try:
            async with session.post(chat_url, headers=headers, json=data) as response:
                if response.status == 200:
                    response_data = await response.json()
                    return response_data['choices'][0]['message']['content']
                else:
                    return f"Error: Server responded with status code {response.status}"
        except aiohttp.ClientError as e:
            return f"Error: {e}"

# Create a Gradio Blocks interface with session state
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("## vLLM OpenChat-3.5 Interface")
    gr.Markdown("### the vLLM server cannot handle concurrent users in spaces. If you get an error, run it on docker.")
    gr.Markdown("This will run better on your own machine: ```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
    registry.hf.space/macadeliccc-openchat-3-5-chatbot:latest python app.py```")


    message = gr.Textbox(label="Your Message", placeholder="Type your message here")
    chatbot = gr.Chatbot()
    clear = gr.Button("Clear")

    history = State([])  # Session state for chat history

    async def user(message, history):
        return "", history + [[message, None]]


    async def bot(history):
        if history and history[-1] and history[-1][0]:
            user_message = history[-1][0]
            bot_response = await chat_with_ochat(user_message)
            history[-1][1] = bot_response  # Update the last entry with the bot's response
        return history

    message.submit(user, [message, chatbot], [message, chatbot], queue=True).then(
        bot, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)

app.queue()
app.launch()