margsli commited on
Commit
0cb1fb9
1 Parent(s): d89263e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -42
app.py CHANGED
@@ -18,7 +18,7 @@ def make_default_md(arena_df, elo_results):
18
 
19
  leaderboard_md = f"""
20
  # NeurIPS LLM Merging Competition Leaderboard
21
- [Website]() | [GitHub]() | [Discord]() |
22
 
23
  """
24
  return leaderboard_md
@@ -223,7 +223,6 @@ def recompute_final_ranking(arena_df):
223
 
224
  def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
225
  arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
226
- arena_df = arena_df[arena_df["num_battles"] > 2000]
227
  arena_df["final_ranking"] = recompute_final_ranking(arena_df)
228
  arena_df = arena_df.sort_values(by=["final_ranking"], ascending=True)
229
 
@@ -234,7 +233,6 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
234
  arena_subset_df = arena_subset_df[arena_subset_df.index.isin(arena_df.index)]
235
  arena_subset_df = arena_subset_df.sort_values(by=["rating"], ascending=False)
236
  # arena_subset_df = arena_subset_df.sort_values(by=["final_ranking"], ascending=True)
237
- # arena_subset_df = arena_subset_df[arena_subset_df["num_battles"] > 500]
238
  arena_subset_df["final_ranking"] = recompute_final_ranking(arena_subset_df)
239
  # keep only the models in the subset in arena_df and recompute final_ranking
240
  arena_df = arena_df[arena_df.index.isin(arena_subset_df.index)]
@@ -248,10 +246,6 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
248
  arena_df = arena_subset_df.join(arena_df["final_ranking"], rsuffix="_global", how="inner")
249
  arena_df["ranking_difference"] = arena_df["final_ranking_global"] - arena_df["final_ranking"]
250
 
251
- # no tie version
252
- # arena_df = arena_subset_df.join(arena_df["final_ranking_no_tie"], rsuffix="_global", how="inner")
253
- # arena_df["ranking_difference"] = arena_df["final_ranking_no_tie_global"] - arena_df["final_ranking_no_tie"]
254
-
255
  arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
256
  arena_df["final_ranking"] = arena_df.apply(lambda x: create_ranking_str(x["final_ranking"], x["ranking_difference"]), axis=1)
257
 
@@ -272,15 +266,6 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
272
  row.append(model_name)
273
  # elo rating
274
  row.append(round(arena_df.iloc[i]["rating"]))
275
- upper_diff = round(
276
- arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"]
277
- )
278
- lower_diff = round(
279
- arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"]
280
- )
281
- row.append(f"+{upper_diff}/-{lower_diff}")
282
- # num battles
283
- row.append(round(arena_df.iloc[i]["num_battles"]))
284
  # Organization
285
  row.append(
286
  model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
@@ -289,11 +274,6 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
289
  row.append(
290
  model_table_df[model_table_df["key"] == model_key]["License"].values[0]
291
  )
292
- cutoff_date = model_table_df[model_table_df["key"] == model_key]["Knowledge cutoff date"].values[0]
293
- if cutoff_date == "-":
294
- row.append("Unknown")
295
- else:
296
- row.append(cutoff_date)
297
  values.append(row)
298
  except Exception as e:
299
  print(f"{model_key} - {e}")
@@ -301,23 +281,9 @@ def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
301
 
302
  key_to_category_name = {
303
  "full": "Overall",
304
- "coding": "Coding",
305
- "long_user": "Longer Query",
306
- "english": "English",
307
- "chinese": "Chinese",
308
- "french": "French",
309
- "no_tie": "Exclude Ties",
310
- "no_short": "Exclude Short",
311
  }
312
  cat_name_to_explanation = {
313
  "Overall": "Overall Questions",
314
- "Coding": "Coding: whether conversation contains code snippets",
315
- "Longer Query": "Longer Query (>= 500 tokens)",
316
- "English": "English Prompts",
317
- "Chinese": "Chinese Prompts",
318
- "French": "French Prompts",
319
- "Exclude Ties": "Exclude Ties and Bothbad",
320
- "Exclude Short": "User Query >= 5 tokens",
321
  }
322
 
323
  def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False):
@@ -364,7 +330,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
364
  "⭐ Arena Elo",
365
  "Organization",
366
  "License",
367
- "Knowledge Cutoff",
368
  ],
369
  datatype=[
370
  "number",
@@ -372,7 +337,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
372
  "number",
373
  "str",
374
  "str",
375
- "str",
376
  ],
377
  value=arena_table_vals,
378
  elem_id="arena_leaderboard_dataframe",
@@ -419,7 +383,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
419
  pass
420
 
421
  def update_leaderboard_df(arena_table_vals):
422
- elo_datarame = pd.DataFrame(arena_table_vals, columns=[ "Rank", "🤖 Model", "⭐ Arena Elo", "Organization", "License", "Knowledge Cutoff"])
423
 
424
  # goal: color the rows based on the rank with styler
425
  def highlight_max(s):
@@ -446,7 +410,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
446
  "⭐ Arena Elo",
447
  "Organization",
448
  "License",
449
- "Knowledge Cutoff",
450
  ],
451
  datatype=[
452
  "number",
@@ -454,7 +417,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
454
  "number",
455
  "str",
456
  "str",
457
- "str",
458
  ],
459
  value=arena_values,
460
  elem_id="arena_leaderboard_dataframe",
@@ -470,7 +432,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
470
  "⭐ Arena Elo",
471
  "Organization",
472
  "License",
473
- "Knowledge Cutoff",
474
  ],
475
  datatype=[
476
  "number",
@@ -478,7 +439,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
478
  "number",
479
  "str",
480
  "str",
481
- "str",
482
  ],
483
  value=arena_values,
484
  elem_id="arena_leaderboard_dataframe",
 
18
 
19
  leaderboard_md = f"""
20
  # NeurIPS LLM Merging Competition Leaderboard
21
+ [Website](https://llm-merging.github.io/index) | [Starter Kit (Github)]() | [Discord](https://discord.com/invite/dPBHEVnV) |
22
 
23
  """
24
  return leaderboard_md
 
223
 
224
  def get_arena_table(arena_df, model_table_df, arena_subset_df=None):
225
  arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
 
226
  arena_df["final_ranking"] = recompute_final_ranking(arena_df)
227
  arena_df = arena_df.sort_values(by=["final_ranking"], ascending=True)
228
 
 
233
  arena_subset_df = arena_subset_df[arena_subset_df.index.isin(arena_df.index)]
234
  arena_subset_df = arena_subset_df.sort_values(by=["rating"], ascending=False)
235
  # arena_subset_df = arena_subset_df.sort_values(by=["final_ranking"], ascending=True)
 
236
  arena_subset_df["final_ranking"] = recompute_final_ranking(arena_subset_df)
237
  # keep only the models in the subset in arena_df and recompute final_ranking
238
  arena_df = arena_df[arena_df.index.isin(arena_subset_df.index)]
 
246
  arena_df = arena_subset_df.join(arena_df["final_ranking"], rsuffix="_global", how="inner")
247
  arena_df["ranking_difference"] = arena_df["final_ranking_global"] - arena_df["final_ranking"]
248
 
 
 
 
 
249
  arena_df = arena_df.sort_values(by=["final_ranking", "rating"], ascending=[True, False])
250
  arena_df["final_ranking"] = arena_df.apply(lambda x: create_ranking_str(x["final_ranking"], x["ranking_difference"]), axis=1)
251
 
 
266
  row.append(model_name)
267
  # elo rating
268
  row.append(round(arena_df.iloc[i]["rating"]))
 
 
 
 
 
 
 
 
 
269
  # Organization
270
  row.append(
271
  model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
 
274
  row.append(
275
  model_table_df[model_table_df["key"] == model_key]["License"].values[0]
276
  )
 
 
 
 
 
277
  values.append(row)
278
  except Exception as e:
279
  print(f"{model_key} - {e}")
 
281
 
282
  key_to_category_name = {
283
  "full": "Overall",
 
 
 
 
 
 
 
284
  }
285
  cat_name_to_explanation = {
286
  "Overall": "Overall Questions",
 
 
 
 
 
 
 
287
  }
288
 
289
  def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=False):
 
330
  "⭐ Arena Elo",
331
  "Organization",
332
  "License",
 
333
  ],
334
  datatype=[
335
  "number",
 
337
  "number",
338
  "str",
339
  "str",
 
340
  ],
341
  value=arena_table_vals,
342
  elem_id="arena_leaderboard_dataframe",
 
383
  pass
384
 
385
  def update_leaderboard_df(arena_table_vals):
386
+ elo_datarame = pd.DataFrame(arena_table_vals, columns=[ "Rank", "🤖 Model", "⭐ Arena Elo", "Organization", "License"])
387
 
388
  # goal: color the rows based on the rank with styler
389
  def highlight_max(s):
 
410
  "⭐ Arena Elo",
411
  "Organization",
412
  "License",
 
413
  ],
414
  datatype=[
415
  "number",
 
417
  "number",
418
  "str",
419
  "str",
 
420
  ],
421
  value=arena_values,
422
  elem_id="arena_leaderboard_dataframe",
 
432
  "⭐ Arena Elo",
433
  "Organization",
434
  "License",
 
435
  ],
436
  datatype=[
437
  "number",
 
439
  "number",
440
  "str",
441
  "str",
 
442
  ],
443
  value=arena_values,
444
  elem_id="arena_leaderboard_dataframe",