Spaces:

yutohub
/

japanese-chatbot-arena-leaderboard

Running

File size: 12,946 Bytes

import json
import os
import random
import time

import pandas as pd
import requests
import streamlit as st


# 環境変数
with open("models_info.json", "r") as json_file:
    MODELS_INFO = json.load(json_file)
with open("test.csv", "r") as file:
    QUESTION_DF = pd.read_csv(file)
MODELS = list(MODELS_INFO.keys())
NUM_QUESTION = 100


# ランキングを取得
def get_leaderboard():
    try:
        response = requests.get(os.environ['DARABASE_URL'])
        response_data = response.json()
        return response_data
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return "Error"

# リーダーボードを作成
def create_leaderboard_df():
    # リーダーボードを取得
    ranking = get_leaderboard()
    # エラー処理
    if ranking == "Error":
        st.error("リーダーボードを取得できませんでした。")
        print("リーダーボードを取得できませんでした。") # ログを表示
        return pd.DataFrame()
    else:
        # データの初期化
        ranks, model_names, ratings, organizations, licenses = [], [], [], [], []
        # リーダーボードの作成
        for i in range(len(ranking)):
            ranks.append(i + 1)
            model_names.append(MODELS_INFO[ranking[i]["model"]][0])
            ratings.append(ranking[i]["rating"])
            organizations.append(MODELS_INFO[ranking[i]["model"]][2])
            licenses.append(MODELS_INFO[ranking[i]["model"]][1])
        # データフレームを返す
        return pd.DataFrame({
            "ランク" : ranks, 
            "🤖 モデル" : model_names, 
            "⭐️ Eloレーティング" : ratings, 
            "🏢 組織" : organizations, 
            "📃 ライセンス" : licenses
        })

# サーバーから回答を取得
@st.cache_data
def get_answer(model_name, question_id):
    try:
        params = {'modelName': model_name, 'questionId': question_id}
        response = requests.get(os.environ['ANSWER_URL'], params=params)
        response_data = response.json()
        return response_data["answer"]
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return "Error"

# サーバーに回答を送信
def send_choice(question_id, model_a, model_b, winner, language):
    # エラー処理 (データが入力されていない場合)
    if not question_id or not model_a or not model_b or not winner or not language:
        st.error("データが入力されていないため、回答を送信できませんでした。")
        print("質問と回答を取得してください。") # ログを表示
        return "Error"
    try:
        data = {
            "question_id": question_id,
            "model_a": model_a,
            "model_b": model_b,
            "winner": winner,
            "language": language,
            "tstamp": time.time(),
        }
        headers = {
            'Content-Type': 'application/json'
        }
        response = requests.post(os.environ['DARABASE_URL'], headers=headers, data=json.dumps(data))
        response_data = response.text
        return response_data
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return "Error"

        
### Callback Functions ###
# ステートの初期化を行う
def handle_init_state():
    if "chat_history_a" not in st.session_state:
        st.session_state["chat_history_a"] = []
    if "chat_history_b" not in st.session_state:
        st.session_state["chat_history_b"] = []
    if "question_id" not in st.session_state:
        st.session_state["question_id"] = None
    if "model_a" not in st.session_state:
        st.session_state["model_a"] = None
    if "model_b" not in st.session_state:
        st.session_state["model_b"] = None
    if "question" not in st.session_state:
        st.session_state["question"] = None
    # ボタンの状態を初期化
    if "question_loaded" not in st.session_state:
        st.session_state["question_loaded"] = False
    # 送信を状態を初期化
    if "answer_sent" not in st.session_state:
        st.session_state["answer_sent"] = False
    
# 質問と回答を取得する
def handle_init_question():
    # エラー処理
    if st.session_state.question_loaded:
        st.session_state.question_loaded = False
        st.session_state.chat_history_a = []
        st.session_state.chat_history_b = []
        st.error("ボタンを連打しないでください。")
        print("既に質問と回答を取得しています。") # ログを表示
    else:
        # ボタンの状態を更新
        st.session_state.question_loaded = True
        st.success("質問と回答を取得しています。しばらくお待ちください。")
        # 質問を取得
        st.session_state.question_id = random.randint(1, NUM_QUESTION)
        st.session_state.question = QUESTION_DF["input"][st.session_state.question_id - 1]
        st.session_state.chat_history_a.append({"role": "user", "content": st.session_state.question})
        st.session_state.chat_history_b.append({"role": "user", "content": st.session_state.question})
        # 回答を取得
        random.shuffle(MODELS)
        st.session_state.model_a = MODELS[0]
        st.session_state.model_b = MODELS[1]
        answer_a = get_answer(st.session_state.model_a, st.session_state.question_id)
        answer_b = get_answer(st.session_state.model_b, st.session_state.question_id)
        # チャット履歴を更新
        st.session_state.chat_history_a.append({"role": "assistant", "content": answer_a})
        st.session_state.chat_history_b.append({"role": "assistant", "content": answer_b})
        st.success("質問と回答を取得しました。回答を選択してください。")
        print("質問と回答を取得しました。") # ログを表示

# ユーザーの回答を送信する
def handle_send_choice(winner):
    # エラー処理
    if st.session_state.answer_sent:
        st.error("既に回答を送信しています。")
        print("既に回答を送信しています。") # ログを表示
    else:
        # ボタンの状態を更新
        st.session_state.answer_sent = True
        # ユーザーの回答を送信
        response = send_choice(
            question_id=st.session_state.question_id,
            model_a=st.session_state.model_a,
            model_b=st.session_state.model_b,
            winner=winner,
            language="Japanese"
            )
        # エラーが発生した場合
        if response == "Error":
            st.error("予期せぬエラーが発生しました。")
        else:
            st.success("選択肢は正常に送信されました。")
        # 初期化
        st.session_state.question_loaded = False


# 表示部分
def main():
    # page config
    st.set_page_config(
        page_title="日本語チャットボットアリーナ",
        page_icon="🏆",
        layout="wide",
    )

    # ステートの初期化
    handle_init_state()
    # 説明を表示
    st.markdown("# 🏆 日本語チャットボットアリーナ")
    st.markdown("## 📖 説明")
    st.markdown("| [Twitter](https://twitter.com/yutohub) | [GitHub](https://github.com/yutohub) | [ブログ](https://zenn.dev/yutohub) |")
    st.markdown("日本語チャットボットアリーナは、日本語に対応しているLLMの評価のためのクラウドソーシングプラットフォームです。[LMSYS Chatbot Arena](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) を参考に、日本語に対応しているLLMのリーダーボードを作成することを目的としています。また、一部の質問と回答は、 [ELYZA-tasks-100](https://huggingface.co/elyza/ELYZA-tasks-100) を利用しています。")
    st.markdown(""" > **注意事項:**
    > 
    > 日本語チャットボットアリーナが提供する情報によって生じたいかなる損害についても、サービス提供者は一切の責任を負いません。
    > 日本語チャットボットアリーナは開発中であり、予告なく停止または終了する可能性があります。
    > また、ユーザーの回答を収集し、Creative Commons Attribution (CC-BY) または同様のライセンスの下で配布する権利を留保しています。
    """)

    # チャット履歴の表示部分
    st.markdown("## ⚔️ チャットボットアリーナ ⚔️")
    st.markdown(" 2つの匿名モデル (ChatGPT、Llama など) の回答を見て、より良いモデルに投票してください。")
    with st.expander(f"🔍 展開するとアリーナに参加している {len(MODELS)} 個のモデルの一覧が表示されます。"):
        st.write(MODELS)
    model_a, model_b = st.columns([1, 1])
    with model_a:
        st.markdown("### モデル A")
        if not st.session_state.chat_history_a:
            st.markdown("質問を取得してください。")
        else:
            for message in st.session_state.chat_history_a:
                with st.chat_message(message["role"]):
                    st.write(message["content"])
            # 送信後に正解のモデルを表示する
            if st.session_state.answer_sent:
                with st.chat_message("assistant"):
                    st.markdown(f"`{st.session_state.model_a}` が回答しました、")
    with model_b:
        st.markdown("### モデル B")
        if not st.session_state.chat_history_b:
            st.markdown("質問を取得してください。")
        else:
            for message in st.session_state.chat_history_b:
                with st.chat_message(message["role"]):
                    st.write(message["content"])
            # 送信後に正解のモデルを表示する
            if st.session_state.answer_sent:
                with st.chat_message("assistant"):
                    st.markdown(f"`{st.session_state.model_b}` が回答しました。")
    # 質問を取得する
    load_question = st.button(
        label="質問を取得",
        on_click=handle_init_question,
        # 回答済みの場合 or 質問を取得済の場合はボタンを無効化
        disabled=st.session_state.answer_sent or st.session_state.question_loaded,
        type="primary",
        use_container_width=True
        )
    # 回答を送信する
    choice_1, choice_2, choice_3, choice_4 = st.columns([1, 1, 1, 1])
    with choice_1:
        choice_1 = st.button(
            label="👈 Aの方が良い",
            on_click=handle_send_choice,
            args=("model_a",),
            disabled=not st.session_state.question_loaded,
            use_container_width=True
        )
    with choice_2:
        choice_2 = st.button(
            label="👉 Bの方が良い",
            on_click=handle_send_choice,
            args=("model_b",),
            disabled=not st.session_state.question_loaded,
            use_container_width=True
        )
    with choice_3:
        choice_3 = st.button(
            label="🤝 どちらも良い",
            on_click=handle_send_choice,
            args=("tie",),
            disabled=not st.session_state.question_loaded,
            use_container_width=True
        )
    with choice_4:
        choice_4 = st.button(
            label="👎 どちらも悪い",
            on_click=handle_send_choice,
            args=("tie (bothbad)",),
            disabled=not st.session_state.question_loaded,
            use_container_width=True
        )
    
    # リーダーボードを表示する
    st.markdown("## 🏆 リーダーボード")
    st.markdown(f"合計で {len(MODELS)} 個のモデルがアリーナに参加しています。30 分毎にリーダーボードが更新されます。")
    # 回答を送信した場合のみ表示する
    if st.session_state.answer_sent:
        # リーダーボードを取得
        leaderboard = create_leaderboard_df()
        st.dataframe(
            data=leaderboard,
            height=(len(MODELS) + 1) * 35 + 3,
            use_container_width=True,
            hide_index=True,
        )
    else:
        st.markdown("""
        > まずは、「⚔️ チャットボットアリーナ ⚔️」に回答を送信してください。
        > 回答を送信すると、リーダーボードが表示されます。
        """)
    
    # 引用を表示する
    st.markdown("## 📚 引用")
    st.markdown("""
    ```
    @misc{elyzatasks100,
        title={ELYZA-tasks-100: 日本語instructionモデル評価データセット},
        url={https://huggingface.co/elyza/ELYZA-tasks-100},
        author={Akira Sasaki and Masato Hirakawa and Shintaro Horie and Tomoaki Nakamura},
        year={2023},
    }
    ```
    """)


if __name__ == "__main__":
    main()