Spaces:
Running
Running
import os | |
from openai import OpenAI | |
import gradio as gr | |
api_key = os.environ.get("OPENAI_API_KEY") | |
base_url = os.environ.get("OPENAI_API_BASE") | |
client = OpenAI(api_key=api_key, base_url=base_url) | |
def predict(message, history): | |
history_openai_format = [] | |
for human, assistant in history: | |
history_openai_format.append({"role": "user", "content": human }) | |
history_openai_format.append({"role": "assistant", "content":assistant}) | |
history_openai_format.append({"role": "user", "content": message}) | |
response = client.chat.completions.create(model='Llama-3-8B-UltraMedical', | |
messages= history_openai_format, | |
temperature=1.0, | |
stop=["<|eot_id|>"], | |
stream=True) | |
partial_message = "" | |
for chunk in response: | |
if chunk.choices[0].delta.content is not None: | |
partial_message = partial_message + chunk.choices[0].delta.content | |
yield partial_message | |
gr.ChatInterface(predict).launch() | |