Spaces:

macadeliccc
/

laser-dolphin-mixtral-chat

Running on Zero

App Files Files Community

laser-dolphin-mixtral-chat / app.py

macadeliccc

test

9568922 about 1 year ago

raw

history blame

2.89 kB

	import spaces
	import gradio as gr
	import torch
	import subprocess
	import aiohttp
	from gradio import State
	import asyncio
	import json
	import asyncio

	# Function to start the ochat server
	@spaces.GPU
	def start_ochat_server():
	print(f"Is CUDA available: {torch.cuda.is_available()}")
	print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")

	command = [
	"python", "-m", "ochat.serving.openai_api_server",
	"--model", "openchat/openchat_3.5"
	]

	# Start the server in a separate process
	try:
	subprocess.Popen(command)
	return "ochat server started successfully"
	except Exception as e:
	return f"Failed to start ochat server: {e}"


	start_ochat_server()

	# Function to send a message to the ochat server and get a response
	async def chat_with_ochat(message):
	base_url = "http://localhost:18888"
	chat_url = f"{base_url}/v1/chat/completions"
	headers = {"Content-Type": "application/json"}
	data = {
	"model": "openchat_3.5",
	"messages": [{"role": "user", "content": message}]
	}

	async with aiohttp.ClientSession() as session:
	try:
	async with session.post(chat_url, headers=headers, json=data) as response:
	if response.status == 200:
	response_data = await response.json()
	return response_data['choices'][0]['message']['content']
	else:
	return f"Error: Server responded with status code {response.status}"
	except aiohttp.ClientError as e:
	return f"Error: {e}"

	# Create a Gradio Blocks interface with session state
	with gr.Blocks(theme=gr.themes.Soft()) as app:
	gr.Markdown("## vLLM OpenChat-3.5 Interface")
	gr.Markdown("### the vLLM server cannot handle concurrent users in spaces. If you get an error, run it on docker.")
	gr.Markdown("This will run better on your own machine: ```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
	registry.hf.space/macadeliccc-openchat-3-5-chatbot:latest python app.py```")


	message = gr.Textbox(label="Your Message", placeholder="Type your message here")
	chatbot = gr.Chatbot()
	clear = gr.Button("Clear")

	history = State([]) # Session state for chat history

	async def user(message, history):
	return "", history + [[message, None]]


	async def bot(history):
	if history and history[-1] and history[-1][0]:
	user_message = history[-1][0]
	bot_response = await chat_with_ochat(user_message)
	history[-1][1] = bot_response # Update the last entry with the bot's response
	return history

	message.submit(user, [message, chatbot], [message, chatbot], queue=True).then(
	bot, chatbot, chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	app.queue()
	app.launch()