Spaces:

kanhatakeyama
/

tanuki8x8bchat

Running

File size: 4,350 Bytes

import gradio as gr
# from huggingface_hub import InferenceClient
from openai import OpenAI
import os
openai_api_key = os.getenv('api_key')
openai_api_base = os.getenv('url')
model_name = "weblab-GENIAC/Tanuki-8x8B-dpo-v1.0"
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [
        {"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat.completions.create(
        model=model_name,
        messages=messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        # response += token
        if token is not None:
            response += (token)
        if response.find("### 指示:")>0:
            break
        yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""

description = """
### [Tanuki-8x8B-dpo-v1.0](https://huggingface.co/weblab-GENIAC/Tanuki-8x8B-dpo-v1.0)との会話(期間限定での公開)
- 人工知能開発のため､原則として**このChatBotの入出力データは全て著作権フリー(CC0)で公開予定です**ので､ご注意ください｡著作物､個人情報､機密情報､誹謗中傷などのデータを入力しないでください｡
- **上記の条件に同意する場合のみ**､以下のChatbotを利用してください｡
"""


HEADER = description
FOOTER = """### 注意
- コンテクスト長が4096までなので､あまり会話が長くなると､エラーで停止します｡ページを再読み込みしてください｡
- GPUサーバーが不安定なので､応答しないことがあるかもしれません｡"""


def run():
    chatbot = gr.Chatbot(
        elem_id="chatbot",
        scale=1,
        show_copy_button=True,
        height="70%",
        layout="panel",
    )
    with gr.Blocks(fill_height=True) as demo:
        gr.Markdown(HEADER)
        gr.ChatInterface(
            fn=respond,
            stop_btn="Stop Generation",
            cache_examples=False,
            multimodal=False,
            chatbot=chatbot,
            additional_inputs_accordion=gr.Accordion(
                label="Parameters", open=False, render=False
            ),
            additional_inputs=[
                gr.Textbox(value="以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。",
                           label="System message(試験用: 変えると出力が壊れる可能性)",
                                              render=False,),
                gr.Slider(
                    minimum=1,
                    maximum=4096,
                    step=1,
                    value=1024,
                    label="Max tokens",
                    visible=True,
                    render=False,
                ),
                gr.Slider(
                    minimum=0,
                    maximum=1,
                    step=0.1,
                    value=0.3,
                    label="Temperature",
                    visible=True,
                    render=False,
                ),
                gr.Slider(
                    minimum=0,
                    maximum=1,
                    step=0.1,
                    value=1.0,
                    label="Top-p",
                    visible=True,
                    render=False,
                ),
            ],
            analytics_enabled=False,
        )
        gr.Markdown(FOOTER)
    demo.queue(max_size=256, api_open=False)
    demo.launch(share=False, quiet=True)


if __name__ == "__main__":
    run()