3v324v23 commited on
Commit
4736a54
1 Parent(s): 7ccbd33

simple fix of columns

Browse files
Files changed (3) hide show
  1. app.py +2 -2
  2. src/display/utils.py +15 -8
  3. src/populate.py +4 -4
app.py CHANGED
@@ -156,7 +156,7 @@ with demo:
156
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
157
 
158
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
159
- with gr.TabItem("MOE-LLM-GPU-Poor-Leaderboard Benchmark", elem_id="llm-benchmark-tab-table", id=0):
160
  with gr.Row():
161
  with gr.Column():
162
  with gr.Row():
@@ -324,7 +324,7 @@ with demo:
324
  value=None,
325
  interactive=True,
326
  )
327
-
328
  with gr.Row():
329
  with gr.Column():
330
  model_name_textbox = gr.Textbox(label="Model name")
 
156
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
157
 
158
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
159
+ with gr.TabItem("open-moe-llm-leaderboard", elem_id="llm-benchmark-tab-table", id=0):
160
  with gr.Row():
161
  with gr.Column():
162
  with gr.Row():
 
324
  value=None,
325
  interactive=True,
326
  )
327
+
328
  with gr.Row():
329
  with gr.Column():
330
  model_name_textbox = gr.Textbox(label="Model name")
src/display/utils.py CHANGED
@@ -7,6 +7,11 @@ import pandas as pd
7
  def fields(raw_class):
8
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
9
 
 
 
 
 
 
10
 
11
  @dataclass
12
  class Task:
@@ -46,7 +51,7 @@ class Tasks(Enum):
46
 
47
  # # XXX include me back at some point
48
  selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
49
- mmlu = Task("mmlu", "acc", "MMLU/Acc (5-shot)")
50
 
51
 
52
  # These classes are for user facing column names,
@@ -71,20 +76,22 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
71
  # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
72
 
73
  # Inference framework
74
- auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnContent("Inference framework", "str", True)])
75
 
76
  for task in Tasks:
77
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
78
  # System performance metrics
79
- auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} End-to-end time (s)", "number", True)])
80
- auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} Prefilling time (s)", "number", True)])
81
- auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} Decoding throughput (tok/s)", "number", True)])
 
 
82
 
83
  # Model information
84
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
85
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
86
  auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
87
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
88
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
89
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
90
  auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
@@ -144,7 +151,7 @@ class InferenceFramework(Enum):
144
 
145
  def to_str(self):
146
  return self.value.name
147
-
148
  @staticmethod
149
  def from_str(inference_framework: str):
150
  if inference_framework in ["moe-infinity"]:
@@ -152,7 +159,7 @@ class InferenceFramework(Enum):
152
  if inference_framework in ["hf-chat"]:
153
  return InferenceFramework.HF_Chat
154
  return InferenceFramework.Unknown
155
-
156
 
157
  class WeightType(Enum):
158
  Adapter = ModelDetails("Adapter")
 
7
  def fields(raw_class):
8
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
9
 
10
+ E2Es = "E2E(s)" #"End-to-end time (s)"
11
+ PREs = "PRE(s)" #"Prefilling time (s)"
12
+ TS = "T/s" #Decoding throughput (tok/s)
13
+ InFrame = "Method" #"Inference framework"
14
+ MULTIPLE_CHOICEs = ["mmlu"]
15
 
16
  @dataclass
17
  class Task:
 
51
 
52
  # # XXX include me back at some point
53
  selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
54
+ mmlu = Task("mmlu", "acc", "MMLU") #MMLU/Acc (5-shot)
55
 
56
 
57
  # These classes are for user facing column names,
 
76
  # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
77
 
78
  # Inference framework
79
+ auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnContent(f"{InFrame}", "str", True)])
80
 
81
  for task in Tasks:
82
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
83
  # System performance metrics
84
+ auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name}-{E2Es}", "number", True)])
85
+ if task.value.benchmark in MULTIPLE_CHOICEs:
86
+ continue
87
+ auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name}-{PREs}", "number", True)])
88
+ auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name}-{TS}", "number", True)])
89
 
90
  # Model information
91
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
92
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
93
  auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
94
+ auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
95
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
96
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
97
  auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
 
151
 
152
  def to_str(self):
153
  return self.value.name
154
+
155
  @staticmethod
156
  def from_str(inference_framework: str):
157
  if inference_framework in ["moe-infinity"]:
 
159
  if inference_framework in ["hf-chat"]:
160
  return InferenceFramework.HF_Chat
161
  return InferenceFramework.Unknown
162
+
163
 
164
  class WeightType(Enum):
165
  Adapter = ModelDetails("Adapter")
src/populate.py CHANGED
@@ -12,7 +12,7 @@ from src.leaderboard.read_evals import get_raw_eval_results, EvalResult, update_
12
 
13
  from src.backend.envs import Tasks as BackendTasks
14
  from src.display.utils import Tasks
15
-
16
 
17
  def get_leaderboard_df(
18
  results_path: str,
@@ -47,9 +47,9 @@ def get_leaderboard_df(
47
 
48
  # bm_to_name_map = {bm: name for name, bm in name_to_bm_map.items()}
49
  system_metrics_to_name_map = {
50
- "end_to_end_time": "End-to-end time (s)",
51
- "prefilling_time": "Prefilling time (s)",
52
- "decoding_throughput": "Decoding throughput (tok/s)",
53
  }
54
 
55
  all_data_json = []
 
12
 
13
  from src.backend.envs import Tasks as BackendTasks
14
  from src.display.utils import Tasks
15
+ from src.display.utils import E2Es, PREs, TS
16
 
17
  def get_leaderboard_df(
18
  results_path: str,
 
47
 
48
  # bm_to_name_map = {bm: name for name, bm in name_to_bm_map.items()}
49
  system_metrics_to_name_map = {
50
+ "end_to_end_time": f"{E2Es}",
51
+ "prefilling_time": f"{PREs}",
52
+ "decoding_throughput": f"{TS}",
53
  }
54
 
55
  all_data_json = []