eduagarcia commited on
Commit
67cd6fc
β€’
1 Parent(s): a69553b

Better about's

Browse files
Files changed (3) hide show
  1. app.py +2 -2
  2. src/display/about.py +26 -5
  3. tasks_config/pt_config.yaml +32 -1
app.py CHANGED
@@ -453,7 +453,7 @@ with demo:
453
  ],
454
  submission_result,
455
  )
456
-
457
  with gr.Row():
458
  with gr.Accordion("πŸ“™ Citation", open=False):
459
  citation_button = gr.Textbox(
@@ -463,7 +463,7 @@ with demo:
463
  elem_id="citation-button",
464
  show_copy_button=True,
465
  )
466
-
467
  scheduler = BackgroundScheduler()
468
  scheduler.add_job(restart_space, "interval", seconds=10800) # restarted every 3h
469
  scheduler.add_job(update_dynamic_files, "cron", minute=30) # launched every hour on the hour
 
453
  ],
454
  submission_result,
455
  )
456
+ """ #TODO: FIX CITATIONS
457
  with gr.Row():
458
  with gr.Accordion("πŸ“™ Citation", open=False):
459
  citation_button = gr.Textbox(
 
463
  elem_id="citation-button",
464
  show_copy_button=True,
465
  )
466
+ """
467
  scheduler = BackgroundScheduler()
468
  scheduler.add_job(restart_space, "interval", seconds=10800) # restarted every 3h
469
  scheduler.add_job(update_dynamic_files, "cron", minute=30) # launched every hour on the hour
src/display/about.py CHANGED
@@ -1,18 +1,30 @@
1
  from src.display.utils import ModelType
2
  from src.display.utils import Tasks
3
- from src.envs import REPO_ID, QUEUE_REPO, RESULTS_REPO, PATH_TO_COLLECTION, LEADERBOARD_NAME, TRUST_REMOTE_CODE
4
 
5
  LM_EVAL_URL = "https://github.com/eduagarcia/lm-evaluation-harness-pt"
6
 
7
  TITLE = F"""<h1 align="center" id="space-title">πŸš€ {LEADERBOARD_NAME}</h1>"""
8
 
 
 
 
 
 
 
 
 
 
 
9
  INTRODUCTION_TEXT = f"""
10
- πŸ“ The πŸš€ {LEADERBOARD_NAME} aims to track, rank and evaluate open LLMs and chatbots.
11
 
12
  This is a fork of the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard" target="_blank">πŸ€— Open LLM Leaderboard</a> with different benchmarks.
13
 
14
  Submit a model for automated evaluation on our GPU cluster on the "Submit" page!
15
- The leaderboard's backend runs the great [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness) - read more details in the "About" page!
 
 
16
  """
17
  task_count = 0
18
  TASKS_LIST= ""
@@ -26,11 +38,20 @@ for task in Tasks:
26
  task = task.value
27
  TASKS_PARAMETERS += f"- {task.col_name}: {task.few_shot}-shot, *{','.join(task.task_list)}* (`{task.metric}`)\n"
28
 
29
- LLM_BENCHMARKS_TEXT = f"""
30
- # Context
31
  This is a fork of the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard" target="_blank">πŸ€— Open LLM Leaderboard</a> with different benchmarks.
32
 
33
  With the plethora of large language models (LLMs) and chatbots being released week upon week, often with grandiose claims of their performance, it can be hard to filter out the genuine progress that is being made by the open-source community and which model is the current state of the art.
 
 
 
 
 
 
 
 
 
 
34
 
35
  ## How it works
36
 
 
1
  from src.display.utils import ModelType
2
  from src.display.utils import Tasks
3
+ from src.envs import REPO_ID, QUEUE_REPO, RESULTS_REPO, PATH_TO_COLLECTION, LEADERBOARD_NAME, TRUST_REMOTE_CODE, TASK_CONFIG
4
 
5
  LM_EVAL_URL = "https://github.com/eduagarcia/lm-evaluation-harness-pt"
6
 
7
  TITLE = F"""<h1 align="center" id="space-title">πŸš€ {LEADERBOARD_NAME}</h1>"""
8
 
9
+ GENERAL_DESCRIPTION = f"πŸ“ The πŸš€ {LEADERBOARD_NAME} aims to track, rank and evaluate open LLMs and chatbots. "
10
+
11
+ SUPPORT_DESCRIPTION = ""
12
+
13
+ if 'readme' in TASK_CONFIG:
14
+ if 'general_description' in TASK_CONFIG['readme']:
15
+ GENERAL_DESCRIPTION = TASK_CONFIG['readme']['general_description']
16
+ if 'support_description' in TASK_CONFIG['readme']:
17
+ SUPPORT_DESCRIPTION = TASK_CONFIG['readme']['support_description']
18
+
19
  INTRODUCTION_TEXT = f"""
20
+ {GENERAL_DESCRIPTION}
21
 
22
  This is a fork of the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard" target="_blank">πŸ€— Open LLM Leaderboard</a> with different benchmarks.
23
 
24
  Submit a model for automated evaluation on our GPU cluster on the "Submit" page!
25
+ The leaderboard's backend runs the great [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness) - read more details in the "About" page!
26
+
27
+ {SUPPORT_DESCRIPTION}
28
  """
29
  task_count = 0
30
  TASKS_LIST= ""
 
38
  task = task.value
39
  TASKS_PARAMETERS += f"- {task.col_name}: {task.few_shot}-shot, *{','.join(task.task_list)}* (`{task.metric}`)\n"
40
 
41
+ ABOUT_DESCRIPTION_CONTEXT = f"""
 
42
  This is a fork of the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard" target="_blank">πŸ€— Open LLM Leaderboard</a> with different benchmarks.
43
 
44
  With the plethora of large language models (LLMs) and chatbots being released week upon week, often with grandiose claims of their performance, it can be hard to filter out the genuine progress that is being made by the open-source community and which model is the current state of the art.
45
+ """
46
+
47
+ if 'readme' in TASK_CONFIG:
48
+ if 'about_description' in TASK_CONFIG['readme']:
49
+ ABOUT_DESCRIPTION_CONTEXT = TASK_CONFIG['readme']['about_description']
50
+
51
+ LLM_BENCHMARKS_TEXT = f"""
52
+ # Context
53
+
54
+ {ABOUT_DESCRIPTION_CONTEXT}
55
 
56
  ## How it works
57
 
tasks_config/pt_config.yaml CHANGED
@@ -10,6 +10,37 @@ config:
10
  LEADERBOARD_NAME: "Open PT-LLM Leaderboard"
11
  GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS: true
12
  TRUST_REMOTE_CODE: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  tasks:
14
  enem_challenge:
15
  benchmark: enem_challenge
@@ -138,7 +169,7 @@ tasks:
138
  - sparrow_sentiment-2016-mozetic-por
139
  - sparrow_sentiment-2018-brum-por
140
  metric: f1_macro
141
- few_shot: 15
142
  limit: 500
143
  baseline: 29.5 #random baseline [3.3, 48.8, 33.1, 33.0]
144
  human_baseline: null
 
10
  LEADERBOARD_NAME: "Open PT-LLM Leaderboard"
11
  GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS: true
12
  TRUST_REMOTE_CODE: true
13
+ readme:
14
+ general_description: |
15
+ πŸ“ The πŸš€ Open PT LLM Leaderboard aims to provide a benchmark for the evaluation of
16
+ Large Language Models (LLMs) in the Portuguese language across a variety of tasks
17
+ and datasets.
18
+ The leaderboard is open to submissions of models from the community
19
+ and is designed to be a resource for researchers, practitioners, and enthusiasts
20
+ interested in the development and evaluation of LLMs for the Portuguese language.
21
+ If you have any questions, suggestions, or would like to contribute to the leaderboard,
22
+ please feel free to reach out at [@eduagarcia](https://linktr.ee/eduagarcia).
23
+ support_description: |
24
+ This leaderboard is made possible by the support of the
25
+ [Center of Excelence in AI (CEIA)](https://ceia.ufg.br/) at the
26
+ [Federal University of GoiΓ‘s (UFG)](https://international.ufg.br/).
27
+ about_description: |
28
+ The πŸš€ Open PT-LLM Leaderboard is a benchmark for the evaluation of
29
+ Large Language Models (LLMs) in the Portuguese language.
30
+
31
+ The leaderboard is open to submissions of models from the community and
32
+ is designed to be a resource for researchers, practitioners, and enthusiasts interested
33
+ in the development and evaluation of LLMs for the Portuguese language.
34
+
35
+ Supported by the [Center of Excelence in AI (CEIA)](https://ceia.ufg.br/) at the
36
+ [Federal University of GoiΓ‘s (UFG)](https://international.ufg.br/), this leaderboard
37
+ operates on a backend of Nvidia A100-80G GPUs. Evaluations are subject to
38
+ resource availability, which is not exclusive. Therefore, please be patient if
39
+ your model is in the queue. If you'd like to support the leaderboard, feel free to
40
+ reach out.
41
+
42
+ This is a fork of the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard" target="_blank">πŸ€— Open LLM Leaderboard</a> with
43
+ portuguese benchmarks.
44
  tasks:
45
  enem_challenge:
46
  benchmark: enem_challenge
 
169
  - sparrow_sentiment-2016-mozetic-por
170
  - sparrow_sentiment-2018-brum-por
171
  metric: f1_macro
172
+ few_shot: 25
173
  limit: 500
174
  baseline: 29.5 #random baseline [3.3, 48.8, 33.1, 33.0]
175
  human_baseline: null