giux78 commited on
Commit
a063e46
โ€ข
1 Parent(s): 9ff2359

first release

Browse files
Files changed (3) hide show
  1. app.py +2 -1
  2. src/about.py +7 -4
  3. src/display/utils.py +1 -1
app.py CHANGED
@@ -101,6 +101,7 @@ with demo:
101
  with gr.TabItem("๐Ÿ“ About", elem_id="llm-benchmark-tab-table", id=2):
102
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
 
 
104
  with gr.TabItem("๐Ÿš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
  with gr.Column():
106
  with gr.Row():
@@ -187,7 +188,7 @@ with demo:
187
  ],
188
  submission_result,
189
  )
190
-
191
  with gr.Row():
192
  with gr.Accordion("๐Ÿ“™ Citation", open=False):
193
  citation_button = gr.Textbox(
 
101
  with gr.TabItem("๐Ÿ“ About", elem_id="llm-benchmark-tab-table", id=2):
102
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
 
104
+ '''
105
  with gr.TabItem("๐Ÿš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
106
  with gr.Column():
107
  with gr.Row():
 
188
  ],
189
  submission_result,
190
  )
191
+ '''
192
  with gr.Row():
193
  with gr.Accordion("๐Ÿ“™ Citation", open=False):
194
  citation_button = gr.Textbox(
src/about.py CHANGED
@@ -32,8 +32,8 @@ TITLE = """<h1 align="center" id="space-title">๐Ÿ‡ฎ๐Ÿ‡น Pinocchio ITA leaderboar
32
  # What does your leaderboard evaluate?
33
  INTRODUCTION_TEXT = """
34
  Pinocchio ITA leaderboard is an effort from <a href="https://mii-llm.ai">mii-llm lab</a> of creating specialized evaluations and models on Italian subjects.
35
- We also released the <a href="https://huggingface.co/datasets/mii-llm/pinocchio">Pinocchio dataset</a> a multimodal evaluation dataset for Italian.
36
- A PR on lm-evaliation-harness is open at the following link if you want to evaluate on your own hardware.
37
  The open source models are evaluated on the following subjects on Pinocchio tasks:
38
  <ul>
39
  <li>Generale</li>
@@ -48,10 +48,13 @@ The open source models are evaluated on the following subjects on Pinocchio task
48
  # Which evaluations are you running? how can people reproduce what you have?
49
  LLM_BENCHMARKS_TEXT = f"""
50
  ## How it works
 
 
51
 
52
- ## Reproducibility
53
- To reproduce our results, here is the commands you can run:
54
 
 
55
  """
56
 
57
  EVALUATION_QUEUE_TEXT = """
 
32
  # What does your leaderboard evaluate?
33
  INTRODUCTION_TEXT = """
34
  Pinocchio ITA leaderboard is an effort from <a href="https://mii-llm.ai">mii-llm lab</a> of creating specialized evaluations and models on Italian subjects.
35
+ We also released the <a href="https://huggingface.co/datasets/mii-llm/pinocchio">Pinocchio dataset</a> a multimodal evaluation dataset for Italian tasks. If you want to
36
+ reproduce the results we mantains a <a href="https://github.com/giux78/lm-evaluation-harness"> fork</a> of lm-evaluation-harness, you can see an example in the About section.
37
  The open source models are evaluated on the following subjects on Pinocchio tasks:
38
  <ul>
39
  <li>Generale</li>
 
48
  # Which evaluations are you running? how can people reproduce what you have?
49
  LLM_BENCHMARKS_TEXT = f"""
50
  ## How it works
51
+ We released the <a href="https://huggingface.co/datasets/mii-llm/pinocchio">Pinocchio dataset</a> a multimodal evaluation dataset for Italian tasks based on original Italian text. If you want to
52
+ reproduce the results we mantains a <a href="https://github.com/giux78/lm-evaluation-harness"> fork</a> of lm-evaluation-harness for reproducing the dataset you can run the following command:
53
 
54
+ ```bash
55
+ lm_eval --model hf --model_args pretrained=anakin87/Phi-3.5-mini-ITA --tasks pinocchio_it_logica,pinocchio_it_generale,pinocchio_it_diritto,pinocchio_it_cultura,pinocchio_it_lingua_straniera,pinocchio_it_matematica_e_scienze --device cuda:0 --batch_size 1
56
 
57
+ ```
58
  """
59
 
60
  EVALUATION_QUEUE_TEXT = """
src/display/utils.py CHANGED
@@ -67,7 +67,7 @@ class ModelType(Enum):
67
  #IFT = ModelDetails(name="instruction-tuned", symbol="โญ•")
68
  IFT = ModelDetails(name="merged", symbol="โญ•")
69
  RL = ModelDetails(name="RL-tuned", symbol="๐ŸŸฆ")
70
- Unknown = ModelDetails(name="", symbol="?")
71
 
72
  def to_str(self, separator=" "):
73
  return f"{self.value.symbol}{separator}{self.value.name}"
 
67
  #IFT = ModelDetails(name="instruction-tuned", symbol="โญ•")
68
  IFT = ModelDetails(name="merged", symbol="โญ•")
69
  RL = ModelDetails(name="RL-tuned", symbol="๐ŸŸฆ")
70
+ Unknown = ModelDetails(name="Unknown", symbol="?")
71
 
72
  def to_str(self, separator=" "):
73
  return f"{self.value.symbol}{separator}{self.value.name}"