Spaces:

THUIR
/

AEOLLM

Running

App Files Files Community

陈俊杰 commited on Jun 3

Commit

7f6ca6e

•

1 Parent(s): 550da67

build

Browse files

Files changed (4) hide show

.DS_Store +0 -0
README.md +3 -3
app.py +91 -0
requirements.txt +2 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
 title: AEOLLM
-emoji: 🏢
-colorFrom: red
 colorTo: green
 sdk: streamlit
-sdk_version: 1.35.0
 app_file: app.py
 pinned: false
 ---

 ---
 title: AEOLLM
+emoji: 😻
+colorFrom: green
 colorTo: green
 sdk: streamlit
+sdk_version: 1.34.0
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import streamlit as st
+import pandas as pd
+# CSS样式
+st.markdown("""
+<style>
+h1 {
+    font-size: 2.5em;  /* 标题字体大小 */
+}
+.stDataFrame {
+    font-family: Helvetica;
+}
+.dataframe th, .dataframe td {
+    width: auto;
+    min-width: 500px;
+}
+</style>
+""", unsafe_allow_html=True)
+# 标题
+st.title('🏆AEOLLM Leaderboard')
+# 描述
+st.markdown("""
+This leaderboard is used to show the performance of the **automatic evaluation methods of LLMs** submitted by the **AEOLLM team** on four tasks:
+- Summary Generation (SG)
+- Non-Factoid QA (NFQA)
+- Dialogue Generation (DG)
+- Text Expansion (TE).
+Details of AEOLLLM can be found at the link: [https://cjj826.github.io/AEOLLM/](https://cjj826.github.io/AEOLLM/)
+""", unsafe_allow_html=True)
+# 创建示例数据
+SG = {
+    "methods": ["Model A", "Model B", "Model C"],
+    "team": ["U1", "U2", "U3"],
+    "acc": [0.75, 0.64, 0.83],
+    "tau": [0.05, 0.28, 0.16],
+    "s": [0.12, 0.27, 0.18],
+}
+df1 = pd.DataFrame(SG)
+NFQA = {
+    "methods": ["Model A", "Model B", "Model C"],
+    "team": ["U1", "U2", "U3"],
+    "acc": [0.75, 0.64, 0.83],
+    "tau": [0.05, 0.28, 0.16],
+    "s": [0.12, 0.27, 0.18]
+}
+df2 = pd.DataFrame(NFQA)
+DG = {
+    "methods": ["Model A", "Model B", "Model C"],
+    "team": ["U1", "U2", "U3"],
+    "acc": [0.75, 0.64, 0.83],
+    "tau": [0.05, 0.28, 0.16],
+    "s": [0.12, 0.27, 0.18]
+}
+df3 = pd.DataFrame(DG)
+TE = {
+    "methods": ["Model A", "Model B", "Model C"],
+    "team": ["U1", "U2", "U3"],
+    "acc": [0.75, 0.64, 0.83],
+    "tau": [0.05, 0.28, 0.16],
+    "s": [0.12, 0.27, 0.18]
+}
+df4 = pd.DataFrame(TE)
+# 创建标签页
+tab1, tab2, tab3, tab4 = st.tabs(["SG", "NFQA", "DG", "TE"])
+# 在标签页 1 中添加内容
+with tab1:
+    st.header("Summary Generation")
+    st.dataframe(df1, use_container_width=True)
+# 在标签页 2 中添加内容
+with tab2:
+    st.header("Non-Factoid QA")
+    st.dataframe(df2, use_container_width=True)
+# 在标签页 3 中添加内容
+with tab3:
+    st.header("Dialogue Generation")
+    st.dataframe(df3, use_container_width=True)
+# 在标签页 4 中添加内容
+with tab4:
+    st.header("Text Expansion")
+    st.dataframe(df4, use_container_width=True, )

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ pandas==2.2.2
2	+ streamlit==1.34.0