b1sheng commited on
Commit
a80d977
โ€ข
1 Parent(s): 6ce58aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -125
app.py CHANGED
@@ -72,7 +72,7 @@ def has_nan_values(df, columns):
72
  return df[columns].isna().any(axis=1)
73
 
74
 
75
- def get_leaderboard_df():
76
  if eval_results:
77
  print("Pulling evaluation results for the leaderboard.")
78
  eval_results.git_pull()
@@ -99,6 +99,22 @@ def get_leaderboard_df():
99
  print(type(df))
100
  return df
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  def get_evaluation_queue_df():
104
  if eval_queue:
@@ -299,29 +315,8 @@ with demo:
299
  )
300
 
301
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
302
- with gr.TabItem("๐Ÿ… LLM Benchmark (lite)", elem_id="llm-benchmark-tab-table", id=0):
303
- leaderboard_table_lite = gr.components.Dataframe(
304
- value=leaderboard_df[COLS_LITE],
305
- headers=COLS_LITE,
306
- datatype=TYPES_LITE,
307
- max_rows=None,
308
- elem_id="leaderboard-table-lite",
309
- )
310
- # Dummy leaderboard for handling the case when the user uses backspace key
311
- hidden_leaderboard_table_for_search_lite = gr.components.Dataframe(
312
- value=original_df[COLS_LITE],
313
- headers=COLS_LITE,
314
- datatype=TYPES_LITE,
315
- max_rows=None,
316
- visible=False,
317
- )
318
- search_bar.submit(
319
- search_table,
320
- [hidden_leaderboard_table_for_search_lite, search_bar],
321
- leaderboard_table_lite,
322
- )
323
-
324
- with gr.TabItem("๐Ÿ“Š Extended view", elem_id="llm-benchmark-tab-table", id=1):
325
  leaderboard_table = gr.components.Dataframe(
326
  value=leaderboard_df,
327
  headers=COLS,
@@ -346,107 +341,7 @@ with demo:
346
  with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
347
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
348
 
349
- with gr.TabItem("โœ‰๏ธโœจ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
350
- with gr.Column():
351
- with gr.Row():
352
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
353
-
354
- with gr.Column():
355
- with gr.Accordion(f"โœ… Finished Evaluations: {len(finished_eval_queue_df)}", open=False):
356
- with gr.Row():
357
- finished_eval_table = gr.components.Dataframe(
358
- value=finished_eval_queue_df,
359
- headers=EVAL_COLS,
360
- datatype=EVAL_TYPES,
361
- max_rows=5,
362
- )
363
- with gr.Accordion(f"๐Ÿ”„ Running Evaluation Queue: {len(running_eval_queue_df)}", open=False):
364
- with gr.Row():
365
- running_eval_table = gr.components.Dataframe(
366
- value=running_eval_queue_df,
367
- headers=EVAL_COLS,
368
- datatype=EVAL_TYPES,
369
- max_rows=5,
370
- )
371
-
372
- with gr.Accordion(f"โณ Pending Evaluation Queue: {len(pending_eval_queue_df)}", open=False):
373
- with gr.Row():
374
- pending_eval_table = gr.components.Dataframe(
375
- value=pending_eval_queue_df,
376
- headers=EVAL_COLS,
377
- datatype=EVAL_TYPES,
378
- max_rows=5,
379
- )
380
- with gr.Row():
381
- gr.Markdown("# โœ‰๏ธโœจ Submit your model here!", elem_classes="markdown-text")
382
-
383
- with gr.Row():
384
- with gr.Column():
385
- model_name_textbox = gr.Textbox(label="Model name")
386
- revision_name_textbox = gr.Textbox(
387
- label="revision", placeholder="main"
388
- )
389
- private = gr.Checkbox(
390
- False, label="Private", visible=not IS_PUBLIC
391
- )
392
- model_type = gr.Dropdown(
393
- choices=["pretrained", "fine-tuned", "with RL"],
394
- label="Model type",
395
- multiselect=False,
396
- value="pretrained",
397
- max_choices=1,
398
- interactive=True,
399
- )
400
-
401
- with gr.Column():
402
- precision = gr.Dropdown(
403
- choices=["float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)"],
404
- label="Precision",
405
- multiselect=False,
406
- value="float16",
407
- max_choices=1,
408
- interactive=True,
409
- )
410
- weight_type = gr.Dropdown(
411
- choices=["Original", "Delta", "Adapter"],
412
- label="Weights type",
413
- multiselect=False,
414
- value="Original",
415
- max_choices=1,
416
- interactive=True,
417
- )
418
- base_model_name_textbox = gr.Textbox(
419
- label="Base model (for delta or adapter weights)"
420
- )
421
-
422
- submit_button = gr.Button("Submit Eval")
423
- submission_result = gr.Markdown()
424
- submit_button.click(
425
- add_new_eval,
426
- [
427
- model_name_textbox,
428
- base_model_name_textbox,
429
- revision_name_textbox,
430
- precision,
431
- private,
432
- weight_type,
433
- model_type
434
- ],
435
- submission_result,
436
- )
437
-
438
- with gr.Row():
439
- refresh_button = gr.Button("Refresh")
440
- refresh_button.click(
441
- refresh,
442
- inputs=[],
443
- outputs=[
444
- leaderboard_table,
445
- finished_eval_table,
446
- running_eval_table,
447
- pending_eval_table,
448
- ],
449
- )
450
 
451
  with gr.Row():
452
  with gr.Accordion("๐Ÿ“™ Citation", open=False):
 
72
  return df[columns].isna().any(axis=1)
73
 
74
 
75
+ def get_leaderboard_df_1():
76
  if eval_results:
77
  print("Pulling evaluation results for the leaderboard.")
78
  eval_results.git_pull()
 
99
  print(type(df))
100
  return df
101
 
102
+ def get_leaderboard_df():
103
+
104
+ data = {
105
+ 'Datasets': ['SOTA(FT)', 'SOTA(ZS)', 'FLAN-T5', 'GPT-3', 'GPT-3.5v2', 'GPT-3.5v3', 'ChatGPT', 'GPT-4'],
106
+ 'KQApro': [93.85, 94.20, 37.27, 38.28, 38.01, 40.35, 47.93, 57.20],
107
+ 'LC-quad2': [33.10, '-', 30.14, 33.04, 33.77, 39.04, 42.76, 54.95],
108
+ 'WQSP': [73.10, 62.98, 59.87, 67.68, 72.34, 79.60, 83.70, 90.45],
109
+ 'CWQ': [72.20, '-', 46.69, 51.77, 53.96, 57.54, 64.02, 71.00],
110
+ 'GrailQA': [76.31, '-', 29.02, 27.58, 30.50, 35.43, 46.77, 51.40],
111
+ 'GraphQ': [41.30, '-', 32.27, 38.32, 40.85, 47.95, 53.10, 63.20],
112
+ 'QALD-9': [67.82, '-', 30.17, 38.54, 44.96, 46.19, 45.71, 57.20],
113
+ 'MKQA': [46.00, '-', 20.17, 26.97, 30.14, 39.05, 44.30, 59.20]
114
+ }
115
+
116
+ df = pd.DataFrame(data)
117
+ return df
118
 
119
  def get_evaluation_queue_df():
120
  if eval_queue:
 
315
  )
316
 
317
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
318
+
319
+ with gr.TabItem("๐Ÿ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=1):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  leaderboard_table = gr.components.Dataframe(
321
  value=leaderboard_df,
322
  headers=COLS,
 
341
  with gr.TabItem("About", elem_id="llm-benchmark-tab-table", id=2):
342
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
343
 
344
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
  with gr.Row():
347
  with gr.Accordion("๐Ÿ“™ Citation", open=False):