Spaces:
Runtime error
Runtime error
sherzod-hakimov
commited on
Commit
•
00586e5
1
Parent(s):
295441e
keep only multimodal tab
Browse files- app.py +0 -38
- src/assets/text_content.py +3 -3
app.py
CHANGED
@@ -67,45 +67,7 @@ with hf_app:
|
|
67 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
68 |
|
69 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
70 |
-
"""
|
71 |
-
####################### FIRST TAB - TEXT-LEADERBOARD #######################
|
72 |
-
"""
|
73 |
-
with gr.TabItem(TEXT_NAME, elem_id="llm-benchmark-tab-table", id=0):
|
74 |
-
with gr.Row():
|
75 |
-
search_bar = gr.Textbox(
|
76 |
-
placeholder=" 🔍 Search for models - separate multiple queries with `;` and press ENTER...",
|
77 |
-
show_label=False,
|
78 |
-
elem_id="search-bar",
|
79 |
-
)
|
80 |
-
|
81 |
-
leaderboard_table = gr.Dataframe(
|
82 |
-
value=text_leaderboard,
|
83 |
-
elem_id="text-leaderboard-table",
|
84 |
-
interactive=False,
|
85 |
-
visible=True,
|
86 |
-
height=dataframe_height
|
87 |
-
)
|
88 |
-
|
89 |
-
# Show information about the clemscore and last updated date below the table
|
90 |
-
gr.HTML(CLEMSCORE_TEXT)
|
91 |
-
gr.HTML(f"Last updated - {github_data['date']}")
|
92 |
|
93 |
-
# Add a dummy leaderboard to handle search queries in leaderboard_table
|
94 |
-
# This will show a temporary leaderboard based on the searched value
|
95 |
-
dummy_leaderboard_table = gr.Dataframe(
|
96 |
-
value=text_leaderboard,
|
97 |
-
elem_id="text-leaderboard-table-dummy",
|
98 |
-
interactive=False,
|
99 |
-
visible=False
|
100 |
-
)
|
101 |
-
|
102 |
-
# Action after submitting a query to the search bar
|
103 |
-
search_bar.submit(
|
104 |
-
query_search,
|
105 |
-
[dummy_leaderboard_table, search_bar],
|
106 |
-
leaderboard_table,
|
107 |
-
queue=True
|
108 |
-
)
|
109 |
|
110 |
"""
|
111 |
####################### SECOND TAB - MULTIMODAL LEADERBOARD #######################
|
|
|
67 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
68 |
|
69 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
"""
|
73 |
####################### SECOND TAB - MULTIMODAL LEADERBOARD #######################
|
src/assets/text_content.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
TITLE = """<h1 align="center" id="space-title"> 🏆 CLEM Leaderboard</h1>"""
|
2 |
|
3 |
REPO = "https://raw.githubusercontent.com/clembench/clembench-runs/main/"
|
4 |
HF_REPO = "colab-potsdam/clem-leaderboard"
|
@@ -11,10 +11,10 @@ INTRODUCTION_TEXT = """
|
|
11 |
|
12 |
The CLEM Leaderboard aims to track, rank and evaluate current cLLMs (chat-optimized Large Language Models) with the suggested pronounciation “clems”.
|
13 |
|
14 |
-
The benchmarking approach is described in [Clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents](https://aclanthology.org/2023.emnlp-main.689.pdf).
|
15 |
-
|
16 |
The multimodal benchmark is described in [Two Giraffes in a Dirt Field: Using Game Play to Investigate Situation Modelling in Large Multimodal Models](https://arxiv.org/abs/2406.14035)
|
17 |
|
|
|
|
|
18 |
Source code for benchmarking "clems" is available here: [Clembench](https://github.com/clembench/clembench)
|
19 |
|
20 |
All generated files and results from the benchmark runs are available here: [clembench-runs](https://github.com/clembench/clembench-runs) </h6>
|
|
|
1 |
+
TITLE = """<h1 align="center" id="space-title"> 🏆 Multimodal CLEM Leaderboard</h1>"""
|
2 |
|
3 |
REPO = "https://raw.githubusercontent.com/clembench/clembench-runs/main/"
|
4 |
HF_REPO = "colab-potsdam/clem-leaderboard"
|
|
|
11 |
|
12 |
The CLEM Leaderboard aims to track, rank and evaluate current cLLMs (chat-optimized Large Language Models) with the suggested pronounciation “clems”.
|
13 |
|
|
|
|
|
14 |
The multimodal benchmark is described in [Two Giraffes in a Dirt Field: Using Game Play to Investigate Situation Modelling in Large Multimodal Models](https://arxiv.org/abs/2406.14035)
|
15 |
|
16 |
+
The original benchmarking approach for text-only models is described in [Clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents](https://aclanthology.org/2023.emnlp-main.689.pdf).
|
17 |
+
|
18 |
Source code for benchmarking "clems" is available here: [Clembench](https://github.com/clembench/clembench)
|
19 |
|
20 |
All generated files and results from the benchmark runs are available here: [clembench-runs](https://github.com/clembench/clembench-runs) </h6>
|