Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,885 Bytes
5956319 74995d7 5956319 866e7a0 a505b42 4ea1c4e 866e7a0 9c1d271 9568922 76789b2 5956319 9568922 5956319 5f7b7b0 9568922 87956ea 3bc8972 866e7a0 87956ea 3bc8972 89bcd26 3bc8972 9c1d271 866e7a0 ef2a575 a505b42 ef2a575 ba9f5b4 3fbf6d6 9a367a8 a505b42 395f92e 46707cf 8e4674b 46707cf 4e58f83 d20a350 d2b7f91 866e7a0 4ea1c4e 866e7a0 4ea1c4e 866e7a0 4ea1c4e 0e4adfe ea66e79 8d68072 9a9a69d 0e4adfe a505b42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import spaces
import gradio as gr
import torch
import subprocess
import aiohttp
from gradio import State
import asyncio
import json
import asyncio
# Function to start the ochat server
@spaces.GPU
def start_ochat_server():
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
command = [
"python", "-m", "ochat.serving.openai_api_server",
"--model", "openchat/openchat_3.5"
]
# Start the server in a separate process
try:
subprocess.Popen(command)
return "ochat server started successfully"
except Exception as e:
return f"Failed to start ochat server: {e}"
start_ochat_server()
# Function to send a message to the ochat server and get a response
async def chat_with_ochat(message):
base_url = "http://localhost:18888"
chat_url = f"{base_url}/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
"model": "openchat_3.5",
"messages": [{"role": "user", "content": message}]
}
async with aiohttp.ClientSession() as session:
try:
async with session.post(chat_url, headers=headers, json=data) as response:
if response.status == 200:
response_data = await response.json()
return response_data['choices'][0]['message']['content']
else:
return f"Error: Server responded with status code {response.status}"
except aiohttp.ClientError as e:
return f"Error: {e}"
# Create a Gradio Blocks interface with session state
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("## vLLM OpenChat-3.5 Interface")
gr.Markdown("### the vLLM server cannot handle concurrent users in spaces. If you get an error, run it on docker.")
gr.Markdown("This will run better on your own machine: ```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
registry.hf.space/macadeliccc-openchat-3-5-chatbot:latest python app.py```")
message = gr.Textbox(label="Your Message", placeholder="Type your message here")
chatbot = gr.Chatbot()
clear = gr.Button("Clear")
history = State([]) # Session state for chat history
async def user(message, history):
return "", history + [[message, None]]
async def bot(history):
if history and history[-1] and history[-1][0]:
user_message = history[-1][0]
bot_response = await chat_with_ochat(user_message)
history[-1][1] = bot_response # Update the last entry with the bot's response
return history
message.submit(user, [message, chatbot], [message, chatbot], queue=True).then(
bot, chatbot, chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
app.queue()
app.launch()
|