Spaces:
Runtime error
Runtime error
Upload 7 files
Browse files- .gitattributes +2 -35
- .gitignore +155 -0
- arena.py +752 -0
- group.jpg +0 -0
- requirements.txt +4 -0
- useapi.py +172 -0
- utils.py +101 -0
.gitattributes
CHANGED
@@ -1,35 +1,2 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
# Auto detect text files and perform LF normalization
|
2 |
+
* text=auto
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
105 |
+
__pypackages__/
|
106 |
+
|
107 |
+
# Celery stuff
|
108 |
+
celerybeat-schedule
|
109 |
+
celerybeat.pid
|
110 |
+
|
111 |
+
# SageMath parsed files
|
112 |
+
*.sage.py
|
113 |
+
|
114 |
+
# Environments
|
115 |
+
.env
|
116 |
+
.venv
|
117 |
+
env/
|
118 |
+
venv/
|
119 |
+
ENV/
|
120 |
+
env.bak/
|
121 |
+
venv.bak/
|
122 |
+
|
123 |
+
# Spyder project settings
|
124 |
+
.spyderproject
|
125 |
+
.spyproject
|
126 |
+
|
127 |
+
# Rope project settings
|
128 |
+
.ropeproject
|
129 |
+
|
130 |
+
# mkdocs documentation
|
131 |
+
/site
|
132 |
+
|
133 |
+
# mypy
|
134 |
+
.mypy_cache/
|
135 |
+
.dmypy.json
|
136 |
+
dmypy.json
|
137 |
+
|
138 |
+
# Pyre type checker
|
139 |
+
.pyre/
|
140 |
+
|
141 |
+
# pytype static type analyzer
|
142 |
+
.pytype/
|
143 |
+
|
144 |
+
# Cython debug symbols
|
145 |
+
cython_debug/
|
146 |
+
|
147 |
+
# PyCharm
|
148 |
+
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
149 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
150 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
151 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
152 |
+
#.idea/
|
153 |
+
|
154 |
+
#database
|
155 |
+
*.csv
|
arena.py
ADDED
@@ -0,0 +1,752 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ruff: noqa
|
2 |
+
import random
|
3 |
+
import requests
|
4 |
+
import io
|
5 |
+
import gradio as gr
|
6 |
+
import pandas as pd
|
7 |
+
from PIL import Image
|
8 |
+
from useapi import *
|
9 |
+
from utils import *
|
10 |
+
|
11 |
+
|
12 |
+
def set_interface_language(lang):
|
13 |
+
if lang == "Chinese":
|
14 |
+
return {
|
15 |
+
"title": "# LLM角色扮演竞技场:在角色扮演场景中评估LLMs的表现",
|
16 |
+
"intro": """
|
17 |
+
## 📜 规则
|
18 |
+
#### · 与两个匿名的模型(例如 Claude, Gemini, Llama )同时进行角色扮演(他们会成为一个相同的角色),投票选出更好的那个
|
19 |
+
#### · 你可以一直对话直到选出赢家(单轮对话上限是5轮)
|
20 |
+
#### · [角色来自Rubii.ai, 想要和角色进行更长久的角色扮演?来Rubii.ai吧。](https://rubii.ai/)
|
21 |
+
#### · 想要开始对话,您需要先在"选择角色"中选择一个角色,之后您需要在"选择时刻"中选择一个时刻,时刻是给角色选一个场景和开场白,这样可以与角色在想要的场景中进行对话。
|
22 |
+
""",
|
23 |
+
"avatar_label": "角色图片",
|
24 |
+
"char_choice_label": "选择角色",
|
25 |
+
"preset_prompt_label": "选择时刻",
|
26 |
+
"refresh_button": "刷新角色列表",
|
27 |
+
"bio_label": "输入你的自设",
|
28 |
+
"bio_placeholder": "我的名字叫Rubii",
|
29 |
+
"chatbox1_label": "Model 1 的答复",
|
30 |
+
"chatbox2_label": "Model 2 的答复",
|
31 |
+
"user_input_placeholder": "在此输入对话",
|
32 |
+
"battle_button": "发送",
|
33 |
+
"score_instruction": "在获得答复后可使用以下四个按钮对本轮对话打分",
|
34 |
+
"model1win_button": "Model 1 效果更好",
|
35 |
+
"model2win_button": "Model 2 效果更好",
|
36 |
+
"tie_button": "效果一样好",
|
37 |
+
"bothbad_button": "效果都不好",
|
38 |
+
"result_placeholder": "结果: ",
|
39 |
+
"refresh_chat_button": "刷新对话",
|
40 |
+
"ranking_tab": "🏆 模型排行",
|
41 |
+
"model_name_header": "模型名称",
|
42 |
+
"contest_count_header": "参赛次数",
|
43 |
+
"win_rate_header": "模型胜率",
|
44 |
+
"random_model": "⚔️ 随机模型",
|
45 |
+
"select_language": "选择语言(语言很重要,选择语言决定了角色扮演中AI用的语言)",
|
46 |
+
"select_language2": "选择语言",
|
47 |
+
"contant": """
|
48 |
+
## 联系我们
|
49 |
+
### 我们正在积极寻求更多模型愿意参与我们排行榜,
|
50 |
+
### 我们也在寻求合作,如果您有意的话,欢迎请联系我们。**Email:** [[email protected]](mailto:[email protected])
|
51 |
+
### 您可以在我们的[Discord](https://discord.gg/jwcTfTpYj5)反馈遇到的BUG和问题
|
52 |
+
## 服务条款
|
53 |
+
### 用户在使用服务之前需同意以下条款:
|
54 |
+
### 该服务为研究预览版。它仅提供有限的安全措施,可能会生成冒犯性内容。不得将该服务用于任何非法、有害、暴力、种族主义或性相关的目的。请勿上传任何私人信息。该服务会收集用户的对话数据,包括文字和图像,并保留在创作共用署名(CC-BY)或类似许可证下分发这些数据的权利。
|
55 |
+
""",
|
56 |
+
}
|
57 |
+
elif lang == "English":
|
58 |
+
return {
|
59 |
+
"title": "# Roleplay LLM Arena: Benchmarking LLMs in the Roleplay Scenario",
|
60 |
+
"intro": """
|
61 |
+
## 📜 Rules
|
62 |
+
#### · Engage in roleplay with two anonymous models (e.g., Claude, Gemini, Llama) simultaneously, as they take on the same character. Vote for the one that performs better.
|
63 |
+
#### · You can continue the conversation until you select a winner (the maximum number of dialogue rounds per session is 5).
|
64 |
+
#### · [The character comes from Rubii.ai. Want to engage in longer roleplay sessions with the character? Come to Rubii.ai.](https://rubii.ai/)
|
65 |
+
#### · To start the comparison, you need to first select a character in "Choose Character." Then, you need to select a "Moment" in "Choose Moment." A moment is used to set a scene and opening line for the character, allowing you to converse with the character in the desired scenario.
|
66 |
+
""",
|
67 |
+
"avatar_label": "Character Image",
|
68 |
+
"char_choice_label": "Select Character",
|
69 |
+
"preset_prompt_label": "Select Moment",
|
70 |
+
"refresh_button": "Refresh Character List",
|
71 |
+
"bio_label": "Enter your bio",
|
72 |
+
"bio_placeholder": "My name is Rubii.",
|
73 |
+
"chatbox1_label": "Model 1's Response",
|
74 |
+
"chatbox2_label": "Model 2's Response",
|
75 |
+
"user_input_placeholder": "Enter conversation here",
|
76 |
+
"battle_button": "Send",
|
77 |
+
"score_instruction": "Use the following four buttons to score this round after receiving the response",
|
78 |
+
"model1win_button": "Model 1 is better",
|
79 |
+
"model2win_button": "Model 2 is better",
|
80 |
+
"tie_button": "Both are equally good",
|
81 |
+
"bothbad_button": "Both are bad",
|
82 |
+
"result_placeholder": "Result: ",
|
83 |
+
"refresh_chat_button": "Refresh Conversation",
|
84 |
+
"ranking_tab": "🏆 Model Ranking",
|
85 |
+
"model_name_header": "Model Name",
|
86 |
+
"contest_count_header": "Contest Count",
|
87 |
+
"win_rate_header": "Win Rate",
|
88 |
+
"random_model": "⚔️ Random Model",
|
89 |
+
"select_language": "Select a language (Language is important; the choice of language determines the language the AI will use in the roleplay)",
|
90 |
+
"select_language2": "Select a language",
|
91 |
+
"contant": """
|
92 |
+
## Contact Us
|
93 |
+
### We are actively seeking more models willing to participate in our leaderboard.
|
94 |
+
### We are also looking for collaboration opportunities. If you are interested, please contact us. **Email:** [[email protected]](mailto:[email protected]).
|
95 |
+
### You can report any bugs and issues on our [Discord](https://discord.gg/jwcTfTpYj5).
|
96 |
+
## Terms of Service
|
97 |
+
### Users must agree to the following terms before using the service:
|
98 |
+
### This service is a research preview. It provides limited safety measures and may generate offensive content. The service should not be used for any illegal, harmful, violent, racist, or sexually related purposes. Please do not upload any personal information. The service collects user conversation data, including text and images, and reserves the right to distribute this data under Creative Commons Attribution (CC-BY) or similar licenses.
|
99 |
+
""",
|
100 |
+
}
|
101 |
+
elif lang == "Japanese":
|
102 |
+
return {
|
103 |
+
"title": "# LLMロールプレイアリーナ:ロールプレイシナリオでのLLMのパフォーマンスを評価",
|
104 |
+
"intro": """
|
105 |
+
## 📜 ルール
|
106 |
+
#### · 2つの匿名モデル(例: Claude, Gemini, Llama)と同時にロールプレイを行い(彼らは同じキャラクターになります)、より良い方に投票してください。
|
107 |
+
#### · 勝者が決まるまで会話を続けることができます(1ターンあたりの会話の上限は5ターンです)。
|
108 |
+
#### · [キャラクターはRubii.aiから来ました。キャラクターともっと長いロールプレイをしたいですか?Rubii.aiに来てください。](https://rubii.ai/)]
|
109 |
+
#### · 会話を始めるには、まず「キャラクターを選択」でキャラクターを選択し、「時刻を選択」でシーンとオープニングを選択してください。これにより、キャラクターと望むシーンで会話ができます。
|
110 |
+
""",
|
111 |
+
"avatar_label": "キャラクター画像",
|
112 |
+
"char_choice_label": "キャラクターを選択",
|
113 |
+
"preset_prompt_label": "時刻を選択",
|
114 |
+
"refresh_button": "キャラクターリストを更新",
|
115 |
+
"bio_label": "あなたのプロフィールを入力",
|
116 |
+
"bio_placeholder": "私の名前はRubii",
|
117 |
+
"chatbox1_label": "Model 1 の応答",
|
118 |
+
"chatbox2_label": "Model 2 の応答",
|
119 |
+
"user_input_placeholder": "ここにメッセージを入力",
|
120 |
+
"battle_button": "送信",
|
121 |
+
"score_instruction": "応答を受け取った後、以下の4つのボタンでこのターンの会話を評価できます",
|
122 |
+
"model1win_button": "Model 1 がより良い",
|
123 |
+
"model2win_button": "Model 2 がより良い",
|
124 |
+
"tie_button": "同じくらい良い",
|
125 |
+
"bothbad_button": "どちらも良くない",
|
126 |
+
"result_placeholder": "結果: ",
|
127 |
+
"refresh_chat_button": "会話を更新",
|
128 |
+
"ranking_tab": "🏆 モデルランキング",
|
129 |
+
"model_name_header": "モデル名",
|
130 |
+
"contest_count_header": "参加回数",
|
131 |
+
"win_rate_header": "モデル勝率",
|
132 |
+
"random_model": "⚔️ ランダムモデル",
|
133 |
+
"select_language": "言語を選択してください(言語は非常に重要です。選択した言語はロールプレイでAIが使用する言語を決定します)",
|
134 |
+
"select_language2": "言語を選択してください",
|
135 |
+
"contant": """
|
136 |
+
## お問い合わせ
|
137 |
+
### 私たちは、リーダーボードに参加したいモデルを積極的に探しています。
|
138 |
+
### 私たちはコラボレーションの機会も探しています。興味がある方は、ぜひご連絡ください。**メール:** [[email protected]](mailto:[email protected])。
|
139 |
+
### バグや問題が発生した場合は、[Discord](https://discord.gg/jwcTfTpYj5)で報告できます。
|
140 |
+
## 利用規約
|
141 |
+
### サービスを利用する前に、ユーザーは以下の規約に同意する必要があります:
|
142 |
+
### 本サービスは研究プレビュー版です。限られた安全対策を提供しており、攻撃的な内容を生成する可能性があります。本サービスを違法、有害、暴力的、人種差別的、または性的な目的で使用しないでください。個人情報のアップロードは避けてください。本サービスはユーザーの会話データ(テキストおよび画像)を収集し、クリエイティブ・コモンズ・ライセンス(CC-BY)または同様のライセンスの下でこれらのデータを配布する権利を保有します。
|
143 |
+
""",
|
144 |
+
}
|
145 |
+
|
146 |
+
elif lang == "Korean":
|
147 |
+
return {
|
148 |
+
"title": "# LLM 역할 수행 경기장: 역할 수행 시나리오에서 LLM의 성능 평가",
|
149 |
+
"intro": """
|
150 |
+
## 📜 규칙
|
151 |
+
#### · 두 개의 익명의 모델(예: Claude, Gemini, Llama)과 동시에 역할 수행을 진행하고, 더 나은 모델을 선택하세요.
|
152 |
+
#### · 우승자를 선택할 때까지 계속 대화를 진행할 수 있습니다(최대 5라운드).
|
153 |
+
#### · [캐릭터는 Rubii.ai에서 왔습니다. 캐릭터와 더 긴 롤플레이를 하고 싶으신가요? Rubii.ai로 오세요.](https://rubii.ai/)
|
154 |
+
#### · 대화를 시작하려면 먼저 "캐릭터 선택"에서 캐릭터를 선택해야 하며, 그 다음 "시나리오 선택"에서 시나리오를 선택해야 합니다. 시나리오는 캐릭터에게 장면과 오프닝을 제공하여 원하는 시나리오에서 대화를 진행할 수 있게 합니다.
|
155 |
+
""",
|
156 |
+
"avatar_label": "캐릭터 이미지",
|
157 |
+
"char_choice_label": "캐릭터 선택",
|
158 |
+
"preset_prompt_label": "시나리오 선택",
|
159 |
+
"refresh_button": "캐릭터 목록 새로고침",
|
160 |
+
"bio_label": "자신의 설정 입력",
|
161 |
+
"bio_placeholder": "제 이름은 루비입니다.",
|
162 |
+
"chatbox1_label": "Model 1의 응답",
|
163 |
+
"chatbox2_label": "Model 2의 응답",
|
164 |
+
"user_input_placeholder": "여기에 대화 입력",
|
165 |
+
"battle_button": "보내기",
|
166 |
+
"score_instruction": "응답을 받은 후 아래 네 개의 버튼을 사용하여 이번 라운드를 평가할 수 있습니다.",
|
167 |
+
"model1win_button": "Model 1이 더 나음",
|
168 |
+
"model2win_button": "Model 2가 더 나음",
|
169 |
+
"tie_button": "똑같이 좋음",
|
170 |
+
"bothbad_button": "둘 다 별로임",
|
171 |
+
"result_placeholder": "결과: ",
|
172 |
+
"refresh_chat_button": "대화 새로고침",
|
173 |
+
"ranking_tab": "🏆 모델 순위",
|
174 |
+
"model_name_header": "모델 이름",
|
175 |
+
"contest_count_header": "참가 횟수",
|
176 |
+
"win_rate_header": "모델 승률",
|
177 |
+
"random_model": "⚔️ 랜덤 모델",
|
178 |
+
"select_language": "언어를 선택하세요 (언어는 매우 중요합니다. 선택한 언어는 역할 놀이에서 AI가 사용할 언어를 결정합니다)",
|
179 |
+
"select_language2": "언어를 선택하세요",
|
180 |
+
"contant": """
|
181 |
+
## 문의하기
|
182 |
+
### 우리는 리더보드에 참여할 의향이 있는 모델을 적극적으로 찾고 있습니다.
|
183 |
+
### 우리는 또한 협력 기회를 모색하고 있습니다. 관심이 있으시면 연락해 주세요. **이메일:** [[email protected]](mailto:[email protected])
|
184 |
+
### 버그 및 문제는 [Discord](https://discord.gg/jwcTfTpYj5)에서 보고할 수 있습니다.
|
185 |
+
## 이용 약관
|
186 |
+
### 사용자는 서비스를 사용하기 전에 다음 약관에 동의해야 합니다:
|
187 |
+
### 이 서비스는 연구 미리보기 버전입니다. 제한된 안전 조치를 제공하며, 불쾌한 콘텐츠를 생성할 수 있습니다. 이 서비스를 불법적, 해롭거나, 폭력적이거나, 인종차별적이거나, 성적으로 관련된 목적으로 사용하지 마십시오. 개인 정보를 업로드하지 마십시오. 이 서비스는 사용자 대화 데이터(텍스트 및 이미지)를 수집하며, 크리에이티브 커먼즈 저작자 표시(CC-BY) 또는 유사한 라이선스 하에 이 데이터를 배포할 권리를 보유합니다.
|
188 |
+
""",
|
189 |
+
}
|
190 |
+
|
191 |
+
|
192 |
+
async def run_battle(
|
193 |
+
user_input,
|
194 |
+
chatbox1,
|
195 |
+
chatbox2,
|
196 |
+
session_id1,
|
197 |
+
session_id2,
|
198 |
+
chat_count,
|
199 |
+
bio,
|
200 |
+
preset_prompt,
|
201 |
+
selected_models,
|
202 |
+
):
|
203 |
+
if chat_count >= 5:
|
204 |
+
chatbox1 = chatbox1 + [
|
205 |
+
(
|
206 |
+
"您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
|
207 |
+
"您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
|
208 |
+
)
|
209 |
+
]
|
210 |
+
chatbox2 = chatbox2 + [
|
211 |
+
(
|
212 |
+
"您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
|
213 |
+
"您已经在此体验了多次模型效果了,前往 rubii.ai 继续对话吧",
|
214 |
+
)
|
215 |
+
]
|
216 |
+
yield (
|
217 |
+
chatbox1,
|
218 |
+
chatbox2,
|
219 |
+
selected_models[0],
|
220 |
+
selected_models[1],
|
221 |
+
gr.update(interactive=True),
|
222 |
+
gr.update(interactive=True),
|
223 |
+
gr.update(interactive=True),
|
224 |
+
gr.update(interactive=True),
|
225 |
+
session_id1,
|
226 |
+
session_id2,
|
227 |
+
chat_count,
|
228 |
+
gr.update(interactive=False),
|
229 |
+
gr.update(value=""),
|
230 |
+
)
|
231 |
+
return
|
232 |
+
chat_count += 1
|
233 |
+
chatbox1 = chatbox1 + [(user_input, "")]
|
234 |
+
chatbox2 = chatbox2 + [(user_input, "")]
|
235 |
+
yield (
|
236 |
+
chatbox1,
|
237 |
+
chatbox2,
|
238 |
+
selected_models[0],
|
239 |
+
selected_models[1],
|
240 |
+
gr.update(interactive=True),
|
241 |
+
gr.update(interactive=True),
|
242 |
+
gr.update(interactive=True),
|
243 |
+
gr.update(interactive=True),
|
244 |
+
session_id1,
|
245 |
+
session_id2,
|
246 |
+
chat_count,
|
247 |
+
gr.update(interactive=True),
|
248 |
+
gr.update(value=""),
|
249 |
+
)
|
250 |
+
response1 = ""
|
251 |
+
response2 = ""
|
252 |
+
async for chunk in combine_streams(
|
253 |
+
user_input,
|
254 |
+
user_input,
|
255 |
+
selected_models[0],
|
256 |
+
selected_models[1],
|
257 |
+
preset_prompt["_id"],
|
258 |
+
preset_prompt["_id"],
|
259 |
+
session_id1,
|
260 |
+
session_id2,
|
261 |
+
bio,
|
262 |
+
bio,
|
263 |
+
language,
|
264 |
+
):
|
265 |
+
if "requestA_header" in chunk:
|
266 |
+
session_id1 = chunk["requestA_header"]["x-session-id"]
|
267 |
+
if "requestB_header" in chunk:
|
268 |
+
session_id2 = chunk["requestB_header"]["x-session-id"]
|
269 |
+
if "requestA" in chunk:
|
270 |
+
response1 += chunk["requestA"]
|
271 |
+
if "requestB" in chunk:
|
272 |
+
response2 += chunk["requestB"]
|
273 |
+
chatbox1 = chatbox1[:-1] + [(user_input, response1)]
|
274 |
+
chatbox2 = chatbox2[:-1] + [(user_input, response2)]
|
275 |
+
yield (
|
276 |
+
chatbox1,
|
277 |
+
chatbox2,
|
278 |
+
selected_models[0],
|
279 |
+
selected_models[1],
|
280 |
+
gr.update(interactive=True),
|
281 |
+
gr.update(interactive=True),
|
282 |
+
gr.update(interactive=True),
|
283 |
+
gr.update(interactive=True),
|
284 |
+
session_id1,
|
285 |
+
session_id2,
|
286 |
+
chat_count,
|
287 |
+
gr.update(interactive=True),
|
288 |
+
gr.update(value=""),
|
289 |
+
)
|
290 |
+
|
291 |
+
|
292 |
+
def select_winner(model1_name, model2_name, state, turn, anony, Language):
|
293 |
+
if Language == "Chinese":
|
294 |
+
if state == "Model 1":
|
295 |
+
result = f"感谢您的投票,你选择了 {state} - {model1_name} 效果更好,{model2_name} 效果更差,刷新以进行下一轮测试"
|
296 |
+
elif state == "Model 2":
|
297 |
+
result = f"感谢您的投票,你选择了 {state} - {model2_name} 效果更好,{model1_name} 效果更差,刷新以进行下一轮测试"
|
298 |
+
elif state == "tie":
|
299 |
+
result = f"感谢您的投票,你选择了 {model1_name} 与 {model2_name} 效果都很好,刷新以进行下一轮测试"
|
300 |
+
elif state == "bothbad":
|
301 |
+
result = f"感谢您的投票,你选择了 {model1_name} 与 {model2_name} 效果都不好,刷新以进行下一轮测试"
|
302 |
+
elif Language == "English":
|
303 |
+
if state == "Model 1":
|
304 |
+
result = f"Thank you for your vote. You chose {state} - {model1_name} performed better, {model2_name} performed worse. Refresh to proceed to the next round of testing."
|
305 |
+
elif state == "Model 2":
|
306 |
+
result = f"Thank you for your vote. You chose {state} - {model2_name} performed better, {model1_name} performed worse. Refresh to proceed to the next round of testing."
|
307 |
+
elif state == "tie":
|
308 |
+
result = f"Thank you for your vote. You selected that both {model1_name} and {model2_name} performed well. Refresh to proceed to the next round of testing."
|
309 |
+
elif state == "bothbad":
|
310 |
+
result = f"Thank you for your vote. You chose that both {model1_name} and {model2_name} performed poorly. Refresh to proceed to the next round of testing."
|
311 |
+
elif Language == "Japanese":
|
312 |
+
if state == "Model 1":
|
313 |
+
result = f"投票ありがとうございます。あなたは {state} - {model1_name} の方が良く、{model2_name} は劣っていると選びました。次のテストを行うにはリフレッシュしてください。"
|
314 |
+
elif state == "Model 2":
|
315 |
+
result = f"投票ありがとうございます。あなたは {state} - {model2_name} の方が良く、{model1_name} は劣っていると選びました。次のテストを行うにはリフレッシュしてください。"
|
316 |
+
elif state == "tie":
|
317 |
+
result = f"投票ありがとうございます。あなたは {model1_name} と {model2_name} の両方が良いと選びました。次のテストを行うにはリフレッシュしてください。"
|
318 |
+
elif state == "bothbad":
|
319 |
+
result = f"投票ありがとうございます。あなたは {model1_name} と {model2_name} の両方が良くないと選びました。次のテストを行うにはリフレッシュしてください。"
|
320 |
+
elif Language == "Korean":
|
321 |
+
if state == "Model 1":
|
322 |
+
result = f"투표해 주셔서 감사합니다. {state} - {model1_name} 이(가) 더 좋다고 선택하셨습니다. {model2_name} 이(가) 더 나쁩니다. 다음 테스트를 위해 새로 고침하세요."
|
323 |
+
elif state == "Model 2":
|
324 |
+
result = f"투표해 주셔서 감사합니다. {state} - {model2_name} 이(가) 더 좋다고 선택하셨습니다. {model1_name} 이(가) 더 나쁩니다. 다음 테스트를 위해 새로 고침하세요."
|
325 |
+
elif state == "tie":
|
326 |
+
result = f"투표해 주셔서 감사합니다. {model1_name} 과(와) {model2_name} 둘 다 좋다고 선택하셨습니다. 다음 테스트를 위해 새로 고침하세요."
|
327 |
+
elif state == "bothbad":
|
328 |
+
result = f"투표해 주셔서 감사합니다. {model1_name} 과(와) {model2_name} 둘 다 나쁘다고 선택하셨습니다. 다음 테스트를 위해 새로 고침하세요."
|
329 |
+
update_model_stats(model1_name, model2_name, state, turn, anony, Language)
|
330 |
+
# 返回结果并让打分按钮置灰
|
331 |
+
return (
|
332 |
+
result,
|
333 |
+
gr.update(interactive=False),
|
334 |
+
gr.update(interactive=False),
|
335 |
+
gr.update(interactive=False),
|
336 |
+
gr.update(interactive=False),
|
337 |
+
gr.update(interactive=False),
|
338 |
+
)
|
339 |
+
|
340 |
+
|
341 |
+
async def get_preset_prompts(char_id, language):
|
342 |
+
recommand_data = await recommand(char_id, language)
|
343 |
+
return [(item["name"], item) for item in recommand_data]
|
344 |
+
|
345 |
+
|
346 |
+
async def update_preset_prompt(char_id, language):
|
347 |
+
preset_prompts = await get_preset_prompts(char_id, language)
|
348 |
+
avatar_image_url = id_to_avatar(char_id)
|
349 |
+
response = requests.get(avatar_image_url)
|
350 |
+
image = Image.open(io.BytesIO(response.content))
|
351 |
+
resized_image = image.resize((224, 224))
|
352 |
+
return gr.update(choices=preset_prompts), resized_image
|
353 |
+
|
354 |
+
|
355 |
+
def update_chat_and_avatar(moment):
|
356 |
+
opening = [(None, moment["opening"])]
|
357 |
+
selected_models = random.sample(models, 2)
|
358 |
+
while selected_models[0] == selected_models[1]:
|
359 |
+
selected_models = random.sample(models, 2)
|
360 |
+
print(selected_models)
|
361 |
+
return opening, opening, moment["image_url"], "", "", selected_models
|
362 |
+
|
363 |
+
|
364 |
+
def refresh_data(language):
|
365 |
+
characters = recommand_character(language)
|
366 |
+
characters = [(item["name"], item["_id"]) for item in characters]
|
367 |
+
return gr.update(choices=characters)
|
368 |
+
|
369 |
+
|
370 |
+
def refresh_chat(moment):
|
371 |
+
chatbox1, chatbox2, avatar_image, session_id1, session_id2, selected_models = (
|
372 |
+
update_chat_and_avatar(moment)
|
373 |
+
)
|
374 |
+
return (
|
375 |
+
chatbox1,
|
376 |
+
chatbox2,
|
377 |
+
avatar_image,
|
378 |
+
session_id1,
|
379 |
+
session_id2,
|
380 |
+
selected_models,
|
381 |
+
gr.update(interactive=False),
|
382 |
+
gr.update(interactive=False),
|
383 |
+
gr.update(interactive=False),
|
384 |
+
gr.update(interactive=False),
|
385 |
+
gr.update(interactive=True),
|
386 |
+
gr.update(value="结果:"),
|
387 |
+
gr.update(value=0),
|
388 |
+
)
|
389 |
+
|
390 |
+
|
391 |
+
def update_language(lang):
|
392 |
+
print("update_language", lang)
|
393 |
+
text = set_interface_language(lang)
|
394 |
+
characters = recommand_character(lang)
|
395 |
+
characters = [(item["name"], item["_id"]) for item in characters]
|
396 |
+
return (
|
397 |
+
text["title"],
|
398 |
+
text["intro"],
|
399 |
+
None,
|
400 |
+
gr.update(label=text["char_choice_label"], choices=characters),
|
401 |
+
gr.update(label=text["preset_prompt_label"]),
|
402 |
+
gr.update(value=text["refresh_button"]),
|
403 |
+
gr.update(placeholder=text["bio_placeholder"], label=text["bio_label"]),
|
404 |
+
gr.update(label=text["chatbox1_label"]),
|
405 |
+
gr.update(label=text["chatbox2_label"]),
|
406 |
+
gr.update(placeholder=text["user_input_placeholder"]),
|
407 |
+
gr.update(value=text["battle_button"]),
|
408 |
+
gr.update(placeholder=text["result_placeholder"]),
|
409 |
+
gr.update(value=text["refresh_chat_button"]),
|
410 |
+
gr.update(value=text["model1win_button"]),
|
411 |
+
gr.update(value=text["model2win_button"]),
|
412 |
+
gr.update(value=text["tie_button"]),
|
413 |
+
gr.update(value=text["bothbad_button"]),
|
414 |
+
gr.update(label=text["random_model"]),
|
415 |
+
gr.update(label=text["ranking_tab"]),
|
416 |
+
gr.update(label=text["select_language"], value=lang),
|
417 |
+
text["score_instruction"],
|
418 |
+
text["contant"],
|
419 |
+
gr.update(value=lang,label=text["select_language2"])
|
420 |
+
)
|
421 |
+
|
422 |
+
|
423 |
+
def auto_i18n(request: gr.Request):
|
424 |
+
print(request.headers["Accept-Language"])
|
425 |
+
if request.headers["Accept-Language"].split(",")[0].lower().startswith("zh"):
|
426 |
+
language = "Chinese"
|
427 |
+
elif request.headers["Accept-Language"].split(",")[0].lower().startswith("en"):
|
428 |
+
language = "English"
|
429 |
+
elif request.headers["Accept-Language"].split(",")[0].lower().startswith("ja"):
|
430 |
+
language = "Japanese"
|
431 |
+
elif request.headers["Accept-Language"].split(",")[0].lower().startswith("ko"):
|
432 |
+
language = "Korean"
|
433 |
+
else:
|
434 |
+
language = "Chinese"
|
435 |
+
return language
|
436 |
+
|
437 |
+
|
438 |
+
def init_and_update(request: gr.Request):
|
439 |
+
detected_lang = auto_i18n(request)
|
440 |
+
return [detected_lang] + list(update_language(detected_lang))
|
441 |
+
def passive_language_change(lang):
|
442 |
+
return gr.update(value=lang)
|
443 |
+
|
444 |
+
|
445 |
+
with gr.Blocks() as demo:
|
446 |
+
# load 的时候就会刷新掉default_language
|
447 |
+
default_language = gr.State("Chinese")
|
448 |
+
language = "Chinese"
|
449 |
+
characters = recommand_character(language)
|
450 |
+
characters = [(item["name"], item["_id"]) for item in characters]
|
451 |
+
text = set_interface_language(default_language.value)
|
452 |
+
models = get_models()
|
453 |
+
with gr.Tab(text["random_model"]) as random_model_tab:
|
454 |
+
with gr.Column():
|
455 |
+
title = gr.Markdown(f"{text['title']}")
|
456 |
+
with gr.Column(scale=10):
|
457 |
+
intro = gr.Markdown(f"{text['intro']}")
|
458 |
+
with gr.Column(scale=1):
|
459 |
+
language = gr.Radio(
|
460 |
+
["English", "Chinese", "Japanese", "Korean"],
|
461 |
+
label=text["select_language"],
|
462 |
+
value=default_language.value,
|
463 |
+
)
|
464 |
+
with gr.Row():
|
465 |
+
with gr.Column(scale=1):
|
466 |
+
avatar_image = gr.Image(scale=1, label=text["avatar_label"])
|
467 |
+
with gr.Column(scale=7):
|
468 |
+
with gr.Row():
|
469 |
+
char_choice = gr.Dropdown(
|
470 |
+
choices=characters,
|
471 |
+
label=text["char_choice_label"],
|
472 |
+
scale=3,
|
473 |
+
)
|
474 |
+
preset_prompt = gr.Dropdown(
|
475 |
+
label=text["preset_prompt_label"], scale=3
|
476 |
+
)
|
477 |
+
refresh_button = gr.Button(
|
478 |
+
text["refresh_button"], scale=1, variant="primary"
|
479 |
+
)
|
480 |
+
with gr.Row():
|
481 |
+
bio = gr.Textbox(
|
482 |
+
show_label=True,
|
483 |
+
label=text["bio_label"],
|
484 |
+
placeholder=text["bio_placeholder"],
|
485 |
+
)
|
486 |
+
with gr.Row():
|
487 |
+
chatbox1 = gr.Chatbot(label=text["chatbox1_label"])
|
488 |
+
chatbox2 = gr.Chatbot(label=text["chatbox2_label"])
|
489 |
+
with gr.Row():
|
490 |
+
user_input = gr.Textbox(
|
491 |
+
placeholder=text["user_input_placeholder"], scale=3, show_label=False
|
492 |
+
)
|
493 |
+
battle_button = gr.Button(text["battle_button"], scale=1, variant="primary")
|
494 |
+
with gr.Column():
|
495 |
+
score_instruction = gr.Markdown(f"{text['score_instruction']}")
|
496 |
+
with gr.Row():
|
497 |
+
model1win_button = gr.Button(
|
498 |
+
text["model1win_button"], variant="primary", interactive=False
|
499 |
+
)
|
500 |
+
model2win_button = gr.Button(
|
501 |
+
text["model2win_button"], variant="primary", interactive=False
|
502 |
+
)
|
503 |
+
tie_button = gr.Button(text["tie_button"], interactive=False)
|
504 |
+
bothbad_button = gr.Button(text["bothbad_button"], interactive=False)
|
505 |
+
with gr.Row():
|
506 |
+
result_output = gr.Textbox(
|
507 |
+
placeholder=text["result_placeholder"], scale=3, show_label=False
|
508 |
+
)
|
509 |
+
refresh_chat_button = gr.Button(
|
510 |
+
text["refresh_chat_button"], variant="secondary", scale=1
|
511 |
+
)
|
512 |
+
|
513 |
+
with gr.Tab(text["ranking_tab"]) as ranking_tab:
|
514 |
+
language2 = gr.Radio(
|
515 |
+
["English", "Chinese", "Japanese", "Korean"],
|
516 |
+
label=text["select_language"],
|
517 |
+
value=default_language.value,
|
518 |
+
)
|
519 |
+
gr.DataFrame(
|
520 |
+
load_dataframe,
|
521 |
+
datatype=["str", "str", "str", "str", "str"],
|
522 |
+
every=gr.Timer(10),
|
523 |
+
)
|
524 |
+
# 插入 CSS 样式,用于隐藏底部的“通过 API 使用”链接
|
525 |
+
gr.HTML("""
|
526 |
+
<style>
|
527 |
+
footer {display: none !important;}
|
528 |
+
</style>
|
529 |
+
""")
|
530 |
+
with gr.Row():
|
531 |
+
with gr.Column(scale=5):
|
532 |
+
contant = gr.Markdown(f"{text['contant']}")
|
533 |
+
with gr.Column(scale=1):
|
534 |
+
gr.Image("group.jpg")
|
535 |
+
|
536 |
+
selected_models = gr.State([])
|
537 |
+
model1_state = gr.State("")
|
538 |
+
model2_state = gr.State("")
|
539 |
+
chat_count = gr.State(0)
|
540 |
+
session_id1 = gr.State("")
|
541 |
+
session_id2 = gr.State("")
|
542 |
+
|
543 |
+
refresh_button.click(fn=refresh_data, inputs=language, outputs=char_choice)
|
544 |
+
refresh_chat_button.click(
|
545 |
+
fn=refresh_chat,
|
546 |
+
inputs=[preset_prompt],
|
547 |
+
outputs=[
|
548 |
+
chatbox1,
|
549 |
+
chatbox2,
|
550 |
+
avatar_image,
|
551 |
+
session_id1,
|
552 |
+
session_id2,
|
553 |
+
selected_models,
|
554 |
+
model1win_button,
|
555 |
+
model2win_button,
|
556 |
+
tie_button,
|
557 |
+
bothbad_button,
|
558 |
+
battle_button,
|
559 |
+
result_output,
|
560 |
+
chat_count,
|
561 |
+
],
|
562 |
+
)
|
563 |
+
language.change(
|
564 |
+
fn=update_language,
|
565 |
+
inputs=language,
|
566 |
+
outputs=[
|
567 |
+
title,
|
568 |
+
intro,
|
569 |
+
avatar_image,
|
570 |
+
char_choice,
|
571 |
+
preset_prompt,
|
572 |
+
refresh_button,
|
573 |
+
bio,
|
574 |
+
chatbox1,
|
575 |
+
chatbox2,
|
576 |
+
user_input,
|
577 |
+
battle_button,
|
578 |
+
result_output,
|
579 |
+
refresh_chat_button,
|
580 |
+
model1win_button,
|
581 |
+
model2win_button,
|
582 |
+
tie_button,
|
583 |
+
bothbad_button,
|
584 |
+
random_model_tab,
|
585 |
+
ranking_tab,
|
586 |
+
language,
|
587 |
+
score_instruction,
|
588 |
+
contant,
|
589 |
+
language2
|
590 |
+
],
|
591 |
+
)
|
592 |
+
language2.change(
|
593 |
+
fn=passive_language_change,
|
594 |
+
inputs=language2,
|
595 |
+
outputs=language
|
596 |
+
)
|
597 |
+
char_choice.change(
|
598 |
+
fn=update_preset_prompt,
|
599 |
+
inputs=[char_choice, language],
|
600 |
+
outputs=[preset_prompt, avatar_image],
|
601 |
+
)
|
602 |
+
preset_prompt.change(
|
603 |
+
fn=update_chat_and_avatar,
|
604 |
+
inputs=[preset_prompt],
|
605 |
+
outputs=[
|
606 |
+
chatbox1,
|
607 |
+
chatbox2,
|
608 |
+
avatar_image,
|
609 |
+
session_id1,
|
610 |
+
session_id2,
|
611 |
+
selected_models,
|
612 |
+
],
|
613 |
+
)
|
614 |
+
model1win_button.click(
|
615 |
+
fn=select_winner,
|
616 |
+
inputs=[
|
617 |
+
model1_state,
|
618 |
+
model2_state,
|
619 |
+
gr.State("Model 1"),
|
620 |
+
chat_count,
|
621 |
+
gr.State(True),
|
622 |
+
language,
|
623 |
+
],
|
624 |
+
outputs=[
|
625 |
+
result_output,
|
626 |
+
model1win_button,
|
627 |
+
model2win_button,
|
628 |
+
tie_button,
|
629 |
+
bothbad_button,
|
630 |
+
battle_button,
|
631 |
+
],
|
632 |
+
)
|
633 |
+
model2win_button.click(
|
634 |
+
fn=select_winner,
|
635 |
+
inputs=[
|
636 |
+
model1_state,
|
637 |
+
model2_state,
|
638 |
+
gr.State("Model 2"),
|
639 |
+
chat_count,
|
640 |
+
gr.State(True),
|
641 |
+
language,
|
642 |
+
],
|
643 |
+
outputs=[
|
644 |
+
result_output,
|
645 |
+
model1win_button,
|
646 |
+
model2win_button,
|
647 |
+
tie_button,
|
648 |
+
bothbad_button,
|
649 |
+
battle_button,
|
650 |
+
],
|
651 |
+
)
|
652 |
+
tie_button.click(
|
653 |
+
fn=select_winner,
|
654 |
+
inputs=[
|
655 |
+
model1_state,
|
656 |
+
model2_state,
|
657 |
+
gr.State("tie"),
|
658 |
+
chat_count,
|
659 |
+
gr.State(True),
|
660 |
+
language,
|
661 |
+
],
|
662 |
+
outputs=[
|
663 |
+
result_output,
|
664 |
+
model1win_button,
|
665 |
+
model2win_button,
|
666 |
+
tie_button,
|
667 |
+
bothbad_button,
|
668 |
+
battle_button,
|
669 |
+
],
|
670 |
+
)
|
671 |
+
bothbad_button.click(
|
672 |
+
fn=select_winner,
|
673 |
+
inputs=[
|
674 |
+
model1_state,
|
675 |
+
model2_state,
|
676 |
+
gr.State("bothbad"),
|
677 |
+
chat_count,
|
678 |
+
gr.State(True),
|
679 |
+
language,
|
680 |
+
],
|
681 |
+
outputs=[
|
682 |
+
result_output,
|
683 |
+
model1win_button,
|
684 |
+
model2win_button,
|
685 |
+
tie_button,
|
686 |
+
bothbad_button,
|
687 |
+
battle_button,
|
688 |
+
],
|
689 |
+
)
|
690 |
+
battle_button.click(
|
691 |
+
run_battle,
|
692 |
+
inputs=[
|
693 |
+
user_input,
|
694 |
+
chatbox1,
|
695 |
+
chatbox2,
|
696 |
+
session_id1,
|
697 |
+
session_id2,
|
698 |
+
chat_count,
|
699 |
+
bio,
|
700 |
+
preset_prompt,
|
701 |
+
selected_models,
|
702 |
+
],
|
703 |
+
outputs=[
|
704 |
+
chatbox1,
|
705 |
+
chatbox2,
|
706 |
+
model1_state,
|
707 |
+
model2_state,
|
708 |
+
model1win_button,
|
709 |
+
model2win_button,
|
710 |
+
tie_button,
|
711 |
+
bothbad_button,
|
712 |
+
session_id1,
|
713 |
+
session_id2,
|
714 |
+
chat_count,
|
715 |
+
battle_button,
|
716 |
+
user_input,
|
717 |
+
],
|
718 |
+
)
|
719 |
+
demo.load(
|
720 |
+
init_and_update,
|
721 |
+
outputs=[
|
722 |
+
default_language,
|
723 |
+
title,
|
724 |
+
intro,
|
725 |
+
avatar_image,
|
726 |
+
char_choice,
|
727 |
+
preset_prompt,
|
728 |
+
refresh_button,
|
729 |
+
bio,
|
730 |
+
chatbox1,
|
731 |
+
chatbox2,
|
732 |
+
user_input,
|
733 |
+
battle_button,
|
734 |
+
result_output,
|
735 |
+
refresh_chat_button,
|
736 |
+
model1win_button,
|
737 |
+
model2win_button,
|
738 |
+
tie_button,
|
739 |
+
bothbad_button,
|
740 |
+
random_model_tab,
|
741 |
+
ranking_tab,
|
742 |
+
language,
|
743 |
+
score_instruction,
|
744 |
+
contant,
|
745 |
+
],
|
746 |
+
)
|
747 |
+
|
748 |
+
if __name__ == "__main__":
|
749 |
+
demo.queue(default_concurrency_limit=8).launch(
|
750 |
+
server_name="0.0.0.0",
|
751 |
+
server_port=7860,
|
752 |
+
)
|
group.jpg
ADDED
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.40.0
|
2 |
+
jinja2>=3.12
|
3 |
+
httpx
|
4 |
+
pymongo
|
useapi.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import httpx
|
3 |
+
import json
|
4 |
+
import requests
|
5 |
+
import math
|
6 |
+
import os
|
7 |
+
client = httpx.AsyncClient()
|
8 |
+
# 请求URL
|
9 |
+
recommand_base_url = "https://" + os.getenv("recommand_base_url")
|
10 |
+
chat_url = "https://" + os.getenv("chat_url")
|
11 |
+
model_url = "https://" + os.getenv("model_url")
|
12 |
+
character_url = "https://" + os.getenv("character_url")
|
13 |
+
avatar_url = "https://" + os.getenv("avatar_url")
|
14 |
+
image_url = "https://" + os.getenv("image_url")
|
15 |
+
auth = os.getenv("auth")
|
16 |
+
#headers
|
17 |
+
def create_headers(language):
|
18 |
+
# 映射
|
19 |
+
language_mapping = {
|
20 |
+
'Chinese': 'zh',
|
21 |
+
'English': 'en',
|
22 |
+
'Japanese': 'ja',
|
23 |
+
'Korean': 'ko'
|
24 |
+
}
|
25 |
+
|
26 |
+
# 获取对应的语言代码,如果不存在则默认为 'zh'
|
27 |
+
language_code = language_mapping.get(language, 'zh')
|
28 |
+
|
29 |
+
return {
|
30 |
+
'X-Refresh-Token': '',
|
31 |
+
'X-Language': language_code,
|
32 |
+
'accept-language': '',
|
33 |
+
'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
|
34 |
+
'Authorization': auth,
|
35 |
+
'Accept': '*/*',
|
36 |
+
'Connection': 'keep-alive'
|
37 |
+
}
|
38 |
+
|
39 |
+
def recommand_character(language):
|
40 |
+
response = requests.get(character_url, headers=create_headers(language))
|
41 |
+
json_data = response.json()
|
42 |
+
characters = [{
|
43 |
+
"name": item["name"],
|
44 |
+
"_id": item["_id"],
|
45 |
+
"avatar_url": str(avatar_url + item['_id'] + "_avatar.webp")
|
46 |
+
} for item in json_data['data']]
|
47 |
+
return characters
|
48 |
+
|
49 |
+
def id_to_avatar(char_id):
|
50 |
+
return str(avatar_url + char_id + "_avatar.webp")
|
51 |
+
|
52 |
+
#GET模型列表
|
53 |
+
def get_models():
|
54 |
+
class ModelStorage:
|
55 |
+
def __init__(self):
|
56 |
+
self.models = []
|
57 |
+
|
58 |
+
def add_models(self, models):
|
59 |
+
for model_info in models:
|
60 |
+
# 过滤掉 'gpt-4o' 和 'gpt-4o-mini'
|
61 |
+
if model_info['model'] not in ['gpt-4o', 'gpt-4o-mini', 'mythomax-13b']:
|
62 |
+
if model_info['model'] in ['qwen-2-7b', 'gemma-2-9b', 'llama-3.1-8b', 'glm-4-9b']:
|
63 |
+
weight = 12 # Assign a low weight to reduce their frequency
|
64 |
+
else:
|
65 |
+
weight = int(math.ceil(24 / model_info['price'] + 0.5))
|
66 |
+
self.models.extend([model_info['model']] * weight)
|
67 |
+
|
68 |
+
model_storage = ModelStorage()
|
69 |
+
|
70 |
+
# 从指定的 URL 获取 JSON 数据
|
71 |
+
response = requests.get(model_url)
|
72 |
+
|
73 |
+
if response.status_code == 200:
|
74 |
+
data = response.json()
|
75 |
+
# 添加模型到 self.models
|
76 |
+
model_storage.add_models(data['data'])
|
77 |
+
return model_storage.models
|
78 |
+
|
79 |
+
#解析推荐json
|
80 |
+
def extract_recommand(data):
|
81 |
+
return [
|
82 |
+
{
|
83 |
+
"character_id": item["character_id"],
|
84 |
+
"avatar_url" : str(avatar_url+item["character_id"]+"_avatar.webp"),
|
85 |
+
"_id": item["_id"],
|
86 |
+
"image_url" : str(image_url+item["_id"]+"_large.webp"),
|
87 |
+
"description": item["description"],
|
88 |
+
"name": item["title"],
|
89 |
+
"opening": item["opening"]
|
90 |
+
}
|
91 |
+
for item in data["data"]["moments"]
|
92 |
+
]
|
93 |
+
|
94 |
+
#请求推荐API
|
95 |
+
async def recommand(char_id, language):
|
96 |
+
recommand_url = str(recommand_base_url + char_id)
|
97 |
+
response = await client.get(recommand_url, headers=create_headers(language))
|
98 |
+
json_data = response.json()
|
99 |
+
return extract_recommand(json_data)
|
100 |
+
|
101 |
+
async def fetch_stream(query, model, moment_id, session_id, bio, request_name, queue, language):
|
102 |
+
payload = {"query": query, "model": model, "bio": bio, "moment_id": moment_id}
|
103 |
+
if session_id:
|
104 |
+
payload["session_id"] = session_id
|
105 |
+
async with client.stream(
|
106 |
+
"POST", chat_url, json=payload, headers=create_headers(language)
|
107 |
+
) as response:
|
108 |
+
# 获取并返回 header
|
109 |
+
if response.status_code != 200:
|
110 |
+
await queue.put((request_name, "content", "Error Occur!"))
|
111 |
+
await queue.put((request_name, "end", None))
|
112 |
+
return
|
113 |
+
response_headers = dict(response.headers)
|
114 |
+
session_id = response_headers.get("x-session-id")
|
115 |
+
await queue.put((request_name, "header", response_headers))
|
116 |
+
|
117 |
+
# 流式处理响应内容
|
118 |
+
async for chunk in response.aiter_bytes():
|
119 |
+
await queue.put((request_name, "content", chunk.decode()))
|
120 |
+
|
121 |
+
# 标记流结束
|
122 |
+
await queue.put((request_name, "end", None))
|
123 |
+
|
124 |
+
return session_id
|
125 |
+
|
126 |
+
|
127 |
+
async def combine_streams(
|
128 |
+
query_a,
|
129 |
+
query_b,
|
130 |
+
model_a,
|
131 |
+
model_b,
|
132 |
+
moment_id_a,
|
133 |
+
moment_id_b,
|
134 |
+
session_id_a,
|
135 |
+
session_id_b,
|
136 |
+
bio_a,
|
137 |
+
bio_b,
|
138 |
+
language
|
139 |
+
):
|
140 |
+
queue = asyncio.Queue()
|
141 |
+
task_a = asyncio.create_task(
|
142 |
+
fetch_stream(
|
143 |
+
query_a, model_a, moment_id_a, session_id_a, bio_a, "requestA", queue, language
|
144 |
+
)
|
145 |
+
)
|
146 |
+
task_b = asyncio.create_task(
|
147 |
+
fetch_stream(
|
148 |
+
query_b, model_b, moment_id_b, session_id_b, bio_b, "requestB", queue, language
|
149 |
+
)
|
150 |
+
)
|
151 |
+
|
152 |
+
headers = {}
|
153 |
+
content = {"requestA": "", "requestB": ""}
|
154 |
+
active_streams = 2
|
155 |
+
|
156 |
+
while active_streams > 0:
|
157 |
+
request_name, data_type, data = await queue.get()
|
158 |
+
|
159 |
+
if data_type == "header":
|
160 |
+
headers[f"{request_name}_header"] = data
|
161 |
+
if len(headers) == 2:
|
162 |
+
yield headers
|
163 |
+
elif data_type == "content":
|
164 |
+
content[request_name] = data.strip()
|
165 |
+
if content["requestA"] or content["requestB"]:
|
166 |
+
yield content
|
167 |
+
content = {"requestA": "", "requestB": ""}
|
168 |
+
elif data_type == "end":
|
169 |
+
active_streams -= 1
|
170 |
+
|
171 |
+
session_id_a = await task_a
|
172 |
+
session_id_b = await task_b
|
utils.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from pymongo import MongoClient
|
3 |
+
import pandas as pd
|
4 |
+
import math
|
5 |
+
import os
|
6 |
+
|
7 |
+
# MongoDB连接配置
|
8 |
+
client = MongoClient(os.getenv("client_link"))
|
9 |
+
db = client.get_database('roleplay')
|
10 |
+
collection = db.get_collection('model_stats')
|
11 |
+
|
12 |
+
def update_model_stats(model1_name, model2_name, winner, turn, anony, language):
|
13 |
+
# 获取当前时间戳
|
14 |
+
tstamp = time.time()
|
15 |
+
|
16 |
+
# 插入数据到MongoDB
|
17 |
+
collection.insert_one({
|
18 |
+
"Model 1": model1_name,
|
19 |
+
"Model 2": model2_name,
|
20 |
+
"Winner": winner,
|
21 |
+
"Turn": turn,
|
22 |
+
"Anony": anony,
|
23 |
+
"Language": language,
|
24 |
+
"tstamp": tstamp
|
25 |
+
})
|
26 |
+
|
27 |
+
def calculate_elo(winner_elo, loser_elo, k=30, outcome=1):
|
28 |
+
"""
|
29 |
+
winner_elo: Elo score of the winner before the game
|
30 |
+
loser_elo: Elo score of the loser before the game
|
31 |
+
k: K-factor in Elo calculation
|
32 |
+
outcome: 1 if winner won, 0.5 if tie, 0 if loser won (inverted)
|
33 |
+
"""
|
34 |
+
expected_win = 1 / (1 + math.pow(10, (loser_elo - winner_elo) / 400))
|
35 |
+
new_winner_elo = winner_elo + k * (outcome - expected_win)
|
36 |
+
return new_winner_elo
|
37 |
+
|
38 |
+
def load_dataframe():
|
39 |
+
# 从MongoDB读取数据
|
40 |
+
cursor = collection.find({})
|
41 |
+
|
42 |
+
# 将游标中的数据转换为DataFrame
|
43 |
+
data = pd.DataFrame(list(cursor))
|
44 |
+
|
45 |
+
# 创建模型名称的唯一列表
|
46 |
+
models = pd.unique(data[['Model 1', 'Model 2']].values.ravel('K'))
|
47 |
+
|
48 |
+
# 初始化结果字典
|
49 |
+
results = {'模型名称': [], '参赛次数': [], '胜利次数': [], 'ELO': []}
|
50 |
+
elo_dict = {model: 1000 for model in models} # 初始化ELO分数为1000
|
51 |
+
|
52 |
+
for _, row in data.iterrows():
|
53 |
+
model1 = row['Model 1']
|
54 |
+
model2 = row['Model 2']
|
55 |
+
winner = row['Winner']
|
56 |
+
|
57 |
+
if winner == 'Model 1':
|
58 |
+
elo_dict[model1] = calculate_elo(elo_dict[model1], elo_dict[model2], outcome=1)
|
59 |
+
elo_dict[model2] = calculate_elo(elo_dict[model2], elo_dict[model1], outcome=0)
|
60 |
+
elif winner == 'Model 2':
|
61 |
+
elo_dict[model2] = calculate_elo(elo_dict[model2], elo_dict[model1], outcome=1)
|
62 |
+
elo_dict[model1] = calculate_elo(elo_dict[model1], elo_dict[model2], outcome=0)
|
63 |
+
elif winner == 'tie':
|
64 |
+
elo_dict[model1] = calculate_elo(elo_dict[model1], elo_dict[model2], outcome=0.8)
|
65 |
+
elo_dict[model2] = calculate_elo(elo_dict[model2], elo_dict[model1], outcome=0.8)
|
66 |
+
elif winner == 'bothbad':
|
67 |
+
elo_dict[model1] = calculate_elo(elo_dict[model1], elo_dict[model2], outcome=0.1)
|
68 |
+
elo_dict[model2] = calculate_elo(elo_dict[model2], elo_dict[model1], outcome=0.1)
|
69 |
+
|
70 |
+
for model in models:
|
71 |
+
count = data['Model 1'].value_counts().get(model, 0) + data['Model 2'].value_counts().get(model, 0)
|
72 |
+
win_count = 0
|
73 |
+
win_count += len(data[(data['Winner'] == 'Model 1') & (data['Model 1'] == model)])
|
74 |
+
win_count += len(data[(data['Winner'] == 'Model 2') & (data['Model 2'] == model)])
|
75 |
+
win_count += len(data[(data['Winner'] == 'tie') & ((data['Model 1'] == model) | (data['Model 2'] == model))])
|
76 |
+
results['模型名称'].append(model)
|
77 |
+
results['参赛次数'].append(count)
|
78 |
+
results['胜利次数'].append(win_count)
|
79 |
+
results['ELO'].append(round(elo_dict[model]))
|
80 |
+
|
81 |
+
# 将结果字典转换为DataFrame
|
82 |
+
result_df = pd.DataFrame(results)
|
83 |
+
|
84 |
+
# 计算胜率并排序
|
85 |
+
result_df["模型胜率"] = (result_df['胜利次数'] / result_df['参赛次数']) * 100
|
86 |
+
result_df = result_df.sort_values(by="模型胜率", ascending=False)
|
87 |
+
result_df["模型胜率"] = result_df["模型胜率"].map("{:.2f}%".format)
|
88 |
+
|
89 |
+
return result_df
|
90 |
+
|
91 |
+
def change_name(old,new):
|
92 |
+
collection.update_many(
|
93 |
+
{ "Model 1": old },
|
94 |
+
{ "$set": { "Model 1": new } }
|
95 |
+
)
|
96 |
+
|
97 |
+
# 更新 Model 2 字段
|
98 |
+
collection.update_many(
|
99 |
+
{ "Model 2": old },
|
100 |
+
{ "$set": { "Model 2": new } }
|
101 |
+
)
|