Spaces:

yutohub
/

japanese-chatbot-arena-leaderboard

Running

App Files Files Community

yutohub commited on Jan 13

Commit

c624d50

•

1 Parent(s): da86125

Create app.py

Browse files

Files changed (1) hide show

app.py +307 -0

app.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import json
+import os
+import random
+import time
+import pandas as pd
+import requests
+import streamlit as st
+# 環境変数
+with open("models_info.json", "r") as json_file:
+    MODELS_INFO = json.load(json_file)
+with open("test.csv", "r") as file:
+    QUESTION_DF = pd.read_csv(file)
+MODELS = list(MODELS_INFO.keys())
+NUM_QUESTION = 100
+# ランキングを取得
+@st.cache_data
+def get_leaderboard():
+    try:
+        response = requests.get(os.environ['DARABASE_URL'])
+        response_data = response.json()
+        return response_data
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return "Error"
+# リーダーボードを作成
+@st.cache_data
+def create_leaderboard_df():
+    # リーダーボードを取得
+    ranking = get_leaderboard()
+    # エラー処理
+    if ranking == "Error":
+        st.error("リーダーボードを取得できませんでした。")
+        print("リーダーボードを取得できませんでした。") # ログを表示
+        return pd.DataFrame()
+    else:
+        # データの初期化
+        ranks, model_names, ratings, organizations, licenses = [], [], [], [], []
+        # リーダーボードの作成
+        for i in range(len(ranking)):
+            ranks.append(i + 1)
+            model_names.append(MODELS_INFO[ranking[i]["model"]][0])
+            ratings.append(ranking[i]["rating"])
+            organizations.append(MODELS_INFO[ranking[i]["model"]][2])
+            licenses.append(MODELS_INFO[ranking[i]["model"]][1])
+        # データフレームを返す
+        return pd.DataFrame({
+            "ランク" : ranks,
+            "🤖 モデル" : model_names,
+            "⭐️ Eloレーティング" : ratings,
+            "🏢 組織" : organizations,
+            "📃 ライセンス" : licenses
+        })
+# サーバーから回答を取得
+def get_answer(model_name, question_id):
+    try:
+        params = {'modelName': model_name, 'questionId': question_id}
+        response = requests.get(os.environ['ANSWER_URL'], params=params)
+        response_data = response.json()
+        return response_data["answer"]
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return "Error"
+# サーバーに回答を送信
+def send_choice(question_id, model_a, model_b, winner, language):
+    # エラー処理 (データが入力されていない場合)
+    if not question_id or not model_a or not model_b or not winner or not language:
+        st.error("データが入力されていないため、回答を送信できませんでした。")
+        print("質問と回答を取得してください。") # ログを表示
+        return "Error"
+    try:
+        data = {
+            "question_id": question_id,
+            "model_a": model_a,
+            "model_b": model_b,
+            "winner": winner,
+            "language": language,
+            "tstamp": time.time(),
+        }
+        headers = {
+            'Content-Type': 'application/json'
+        }
+        response = requests.post(os.environ['DARABASE_URL'], headers=headers, data=json.dumps(data))
+        response_data = response.text
+        return response_data
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")
+        return "Error"
+### Callback Functions ###
+# ステートの初期化を行う
+def handle_init_state():
+    if "chat_history_a" not in st.session_state:
+        st.session_state["chat_history_a"] = []
+    if "chat_history_b" not in st.session_state:
+        st.session_state["chat_history_b"] = []
+    if "question_id" not in st.session_state:
+        st.session_state["question_id"] = None
+    if "model_a" not in st.session_state:
+        st.session_state["model_a"] = None
+    if "model_b" not in st.session_state:
+        st.session_state["model_b"] = None
+    if "question" not in st.session_state:
+        st.session_state["question"] = None
+    # ボタンの状態を初期化
+    if "question_loaded" not in st.session_state:
+        st.session_state["question_loaded"] = False
+    # 送信を状態を初期化
+    if "answer_sent" not in st.session_state:
+        st.session_state["answer_sent"] = False
+# 質問と回答を取得する
+def handle_init_question():
+    # エラー処理
+    if st.session_state.question_loaded:
+        st.session_state.question_loaded = False
+        st.session_state.chat_history_a = []
+        st.session_state.chat_history_b = []
+        st.error("ボタンを連打しないでください。")
+        print("既に質問と回答を取得しています。") # ログを表示
+    else:
+        # ボタンの状態を更新
+        st.session_state.question_loaded = True
+        st.success("質問と回答を取得しています。しばらくお待ちください。")
+        # 質問を取得
+        st.session_state.question_id = random.randint(1, NUM_QUESTION)
+        st.session_state.question = QUESTION_DF["input"][st.session_state.question_id - 1]
+        st.session_state.chat_history_a.append({"role": "user", "content": st.session_state.question})
+        st.session_state.chat_history_b.append({"role": "user", "content": st.session_state.question})
+        # 回答を取得
+        random.shuffle(MODELS)
+        st.session_state.model_a = MODELS[0]
+        st.session_state.model_b = MODELS[1]
+        answer_a = get_answer(st.session_state.model_a, st.session_state.question_id)
+        answer_b = get_answer(st.session_state.model_b, st.session_state.question_id)
+        # チャット履歴を更新
+        st.session_state.chat_history_a.append({"role": "assistant", "content": answer_a})
+        st.session_state.chat_history_b.append({"role": "assistant", "content": answer_b})
+        st.success("質問と回答を取得しました。回答を選択してください。")
+        print("質問と回答を取得しました。") # ログを表示
+# ユーザーの回答を送信する
+def handle_send_choice(winner):
+    # エラー処理
+    if st.session_state.answer_sent:
+        st.error("既に回答を送信しています。")
+        print("既に回答を送信しています。") # ログを表示
+    else:
+        # ボタンの状態を更新
+        st.session_state.answer_sent = True
+        # ユーザーの回答を送信
+        response = send_choice(
+            question_id=st.session_state.question_id,
+            model_a=st.session_state.model_a,
+            model_b=st.session_state.model_b,
+            winner=winner,
+            language="Japanese"
+            )
+        # エラーが発生した場合
+        if response == "Error":
+            st.error("予期せぬエラーが発生しました。")
+        else:
+            st.success("選択肢は正常に送信されました。")
+        # 初期化
+        st.session_state.question_loaded = False
+# 表示部分
+def main():
+    # page config
+    st.set_page_config(
+        page_title="日本語チャットボットアリーナ",
+        page_icon="🏆",
+        layout="wide",
+    )
+    # ステートの初期化
+    handle_init_state()
+    # 説明を表示
+    st.markdown("# 🏆 日本語チャットボットアリーナ")
+    st.markdown("## 📖 説明")
+    st.markdown("| [Twitter](https://twitter.com/yutohub) | [GitHub](https://github.com/yutohub) | [ブログ](https://zenn.dev/yutohub) |")
+    st.markdown("日本語チャットボットアリーナは、日本語に対応しているLLMの評価のためのクラウドソーシングプラットフォームです。[LMSYS Chatbot Arena](https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard) を参考に、日本語に対応しているLLMのリーダーボードを作成することを目的としています。また、一部の質問と回答は、 [ELYZA-tasks-100](https://huggingface.co/elyza/ELYZA-tasks-100) を利用しています。")
+    st.markdown(""" > **注意事項:**
+    >
+    > 日本語チャットボットアリーナが提供する情報によって生じたいかなる損害についても、サービス提供者は一切の責任を負いません。
+    > 日本語チャットボットアリーナは開発中であり、予告なく停止または終了する可能性があります。
+    > また、ユーザーの回答を収集し、Creative Commons Attribution (CC-BY) または同様のライセンスの下で配布する権利を留保しています。
+    """)
+    # チャット履歴の表示部分
+    st.markdown("## ⚔️ チャットボットアリーナ ⚔️")
+    st.markdown(" 2つの匿名モデル (ChatGPT、Llama など) の回答を見て、より良いモデルに投票してください。")
+    with st.expander(f"🔍 展開するとアリーナに参加している {len(MODELS)} 個のモデルの一覧が表示されます。"):
+        st.write(MODELS)
+    model_a, model_b = st.columns([1, 1])
+    with model_a:
+        st.markdown("### モデル A")
+        if not st.session_state.chat_history_a:
+            st.markdown("質問を取得してください。")
+        else:
+            for message in st.session_state.chat_history_a:
+                with st.chat_message(message["role"]):
+                    st.write(message["content"])
+            # 送信後に正解のモデルを表示する
+            if st.session_state.answer_sent:
+                with st.chat_message("assistant"):
+                    st.markdown(f"`{st.session_state.model_a}` が回答しました、")
+    with model_b:
+        st.markdown("### モデル B")
+        if not st.session_state.chat_history_b:
+            st.markdown("質問を取得してください。")
+        else:
+            for message in st.session_state.chat_history_b:
+                with st.chat_message(message["role"]):
+                    st.write(message["content"])
+            # 送信後に正解のモデルを表示する
+            if st.session_state.answer_sent:
+                with st.chat_message("assistant"):
+                    st.markdown(f"`{st.session_state.model_b}` が回答しました。")
+    # 質問を取得する
+    load_question = st.button(
+        label="質問を取得",
+        on_click=handle_init_question,
+        # 回答済みの場合 or 質問を取得済の場合はボタンを無効化
+        disabled=st.session_state.answer_sent or st.session_state.question_loaded,
+        type="primary",
+        use_container_width=True
+        )
+    # 回答を送信する
+    choice_1, choice_2, choice_3, choice_4 = st.columns([1, 1, 1, 1])
+    with choice_1:
+        choice_1 = st.button(
+            label="👈 Aの方が良い",
+            on_click=handle_send_choice,
+            args=("model_a",),
+            disabled=not st.session_state.question_loaded,
+            use_container_width=True
+        )
+    with choice_2:
+        choice_2 = st.button(
+            label="👉 Bの方が良い",
+            on_click=handle_send_choice,
+            args=("model_b",),
+            disabled=not st.session_state.question_loaded,
+            use_container_width=True
+        )
+    with choice_3:
+        choice_3 = st.button(
+            label="🤝 どちらも良い",
+            on_click=handle_send_choice,
+            args=("tie",),
+            disabled=not st.session_state.question_loaded,
+            use_container_width=True
+        )
+    with choice_4:
+        choice_4 = st.button(
+            label="👎 どちらも悪い",
+            on_click=handle_send_choice,
+            args=("tie (bothbad)",),
+            disabled=not st.session_state.question_loaded,
+            use_container_width=True
+        )
+    # リーダーボードを表示する
+    st.markdown("## 🏆 リーダーボード")
+    st.markdown(f"合計で {len(MODELS)} 個のモデルがアリーナに参加しています。30 分毎にリーダーボードが更新されます。")
+    # 回答を送信した場合のみ表示する
+    if st.session_state.answer_sent:
+        # リーダーボードを取得
+        leaderboard = create_leaderboard_df()
+        st.dataframe(
+            data=leaderboard,
+            height=(len(MODELS) + 1) * 35 + 3,
+            use_container_width=True,
+            hide_index=True,
+        )
+    else:
+        st.markdown("""
+        > まずは、「⚔️ チャットボットアリーナ ⚔️」に回答を送信してください。
+        > 回答を送信すると、リーダーボードが表示されます。
+        """)
+    # 引用を表示する
+    st.markdown("## 📚 引用")
+    st.markdown("""
+    ```
+    @misc{elyzatasks100,
+        title={ELYZA-tasks-100: 日本語instructionモデル評価データセット},
+        url={https://huggingface.co/elyza/ELYZA-tasks-100},
+        author={Akira Sasaki and Masato Hirakawa and Shintaro Horie and Tomoaki Nakamura},
+        year={2023},
+    }
+    ```
+    """)
+if __name__ == "__main__":
+    main()