macadeliccc's picture
aiohttp tests
03cbe66
raw
history blame
3.42 kB
import spaces
import gradio as gr
import torch
import subprocess
import aiohttp
from gradio import State
import asyncio
import json
import asyncio
# Function to start the ochat server
@spaces.GPU
async def start_ochat_server():
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
command = [
"python", "-m", "ochat.serving.openai_api_server",
"--model", "openchat/openchat_3.5"
]
async def is_server_running():
async with aiohttp.ClientSession() as session:
try:
async with session.get("http://localhost:18888/docs") as response:
return response.status == 200
except aiohttp.ClientError:
return False
while True:
if not await is_server_running():
try:
# Start the server in a separate process
subprocess.Popen(command)
print("ochat server started successfully.")
except Exception as e:
print(f"Failed to start ochat server: {e}")
await asyncio.sleep(60) # Wait for 60 seconds before checking again
# Start the server
asyncio.run(start_ochat_server())
# Function to send a message to the ochat server and get a response
async def chat_with_ochat(message):
base_url = "http://localhost:18888"
chat_url = f"{base_url}/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
"model": "openchat_3.5",
"messages": [{"role": "user", "content": message}]
}
async with aiohttp.ClientSession() as session:
try:
async with session.post(chat_url, headers=headers, json=data) as response:
if response.status == 200:
response_data = await response.json()
return response_data['choices'][0]['message']['content']
else:
return f"Error: Server responded with status code {response.status}"
except aiohttp.ClientError as e:
return f"Error: {e}"
# Create a Gradio Blocks interface with session state
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("## vLLM OpenChat-3.5 Interface")
gr.Markdown("### the vLLM server cannot handle concurrent users in spaces. If you get an error, run it on docker.")
gr.Markdown("This will run better on your own machine: ```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
registry.hf.space/macadeliccc-openchat-3-5-chatbot:latest python app.py```")
message = gr.Textbox(label="Your Message", placeholder="Type your message here")
chatbot = gr.Chatbot()
clear = gr.Button("Clear")
history = State([]) # Session state for chat history
async def user(message, history):
return "", history + [[message, None]]
async def bot(history):
if history and history[-1] and history[-1][0]:
user_message = history[-1][0]
bot_response = await chat_with_ochat(user_message)
history[-1][1] = bot_response # Update the last entry with the bot's response
return history
message.submit(user, [message, chatbot], [message, chatbot], queue=True).then(
bot, chatbot, chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
app.queue()
app.launch()