Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from llama_cpp import Llama | |
from huggingface_hub import hf_hub_download | |
# Hugging FaceのAPIトークンを設定 | |
os.environ["HUGGINGFACE_TOKEN"] = os.getenv("HUGGINGFACE_TOKEN") | |
model_name_or_path = "TheBloke/OpenBuddy-Llama2-13B-v11.1-GGUF" | |
model_basename = "openbuddy-llama2-13b-v11.1.Q2_K.gguf" | |
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, revision="main") | |
llama = Llama(model_path) | |
def predict(message, history): | |
messages = [] | |
for human_content, system_content in history: | |
message_human = { | |
"role": "user", | |
"content": human_content + "\n", | |
} | |
message_system = { | |
"role": "system", | |
"content": system_content + "\n", | |
} | |
messages.append(message_human) | |
messages.append(message_system) | |
message_human = { | |
"role": "user", | |
"content": message + "\n", | |
} | |
messages.append(message_human) | |
# Llamaでの回答を取得(ストリーミングオン) | |
streamer = llama.create_chat_completion(messages, stream=True) | |
partial_message = "" | |
for msg in streamer: | |
message = msg['choices'][0]['delta'] | |
if 'content' in message: | |
partial_message += message['content'] | |
yield partial_message | |
gr.ChatInterface(predict).launch(enable_queue=True) | |