Spaces:

open-llm-leaderboard
/

comparator

Running

App Files Files Community

albertvillanova HF staff commited on 15 days ago

Commit

c660995

•

1 Parent(s): 25557b5

Use latest result per model

Browse files

Files changed (1) hide show

app.py +28 -12

app.py CHANGED Viewed

@@ -11,32 +11,48 @@ RESULTS_DATASET_ID = "datasets/open-llm-leaderboard/results"
 fs = HfFileSystem()
-def fetch_results():
-    files = fs.glob(f"{RESULTS_DATASET_ID}/**/**/*.json")
-    results = [file[len(RESULTS_DATASET_ID) +1:] for file in files]
-    return results
 def load_result(result_path) -> pd.DataFrame:
-    with fs.open(f"{RESULTS_DATASET_ID}/{result_path}", "r") as f:
         data = json.load(f)
     model_name = data.get("model_name", "Model")
     df = pd.json_normalize([data])
     return df.iloc[0].rename_axis("Parameters").rename(model_name).to_frame()  # .reset_index()
-def render_result_1(result_path, results):
     result = load_result(result_path)
     return pd.concat([result, results.iloc[:, [0, 2]].set_index("Parameters")], axis=1).reset_index()
-def render_result_2(result_path, results):
     result = load_result(result_path)
     return pd.concat([results.iloc[:, [0, 1]].set_index("Parameters"), result], axis=1).reset_index()
 if __name__ == "__main__":
-    results = fetch_results()
     with gr.Blocks(fill_height=True) as demo:
         gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
@@ -44,10 +60,10 @@ if __name__ == "__main__":
         with gr.Row():
             with gr.Column():
-                result_path_1 = gr.Dropdown(choices=results, label="Results")
                 load_btn_1 = gr.Button("Load")
             with gr.Column():
-                result_path_2 = gr.Dropdown(choices=results, label="Results")
                 load_btn_2 = gr.Button("Load")
         with gr.Row():
@@ -61,12 +77,12 @@ if __name__ == "__main__":
         load_btn_1.click(
             fn=render_result_1,
-            inputs=[result_path_1, compared_results],
             outputs=compared_results,
         )
         load_btn_2.click(
             fn=render_result_2,
-            inputs=[result_path_2, compared_results],
             outputs=compared_results,
         )

 fs = HfFileSystem()
+def fetch_result_paths():
+    paths = fs.glob(f"{RESULTS_DATASET_ID}/**/**/*.json")
+    # results = [file[len(RESULTS_DATASET_ID) +1:] for file in files]
+    return paths
+def filter_latest_result_path_per_model(paths):
+    from collections import defaultdict
+    d = defaultdict(list)
+    for path in paths:
+        model_id, _ = path[len(RESULTS_DATASET_ID) +1:].rsplit("/", 1)
+        d[model_id].append(path)
+    return {model_id: max(paths) for model_id, paths in d.items()}
+def get_result_path_from_model(model_id, result_path_per_model):
+    return result_path_per_model[model_id]
 def load_result(result_path) -> pd.DataFrame:
+    with fs.open(result_path, "r") as f:
         data = json.load(f)
     model_name = data.get("model_name", "Model")
     df = pd.json_normalize([data])
     return df.iloc[0].rename_axis("Parameters").rename(model_name).to_frame()  # .reset_index()
+def render_result_1(model_id, results):
+    result_path = get_result_path_from_model(model_id, latest_result_path_per_model)
     result = load_result(result_path)
     return pd.concat([result, results.iloc[:, [0, 2]].set_index("Parameters")], axis=1).reset_index()
+def render_result_2(model_id, results):
+    result_path = get_result_path_from_model(model_id, latest_result_path_per_model)
     result = load_result(result_path)
     return pd.concat([results.iloc[:, [0, 1]].set_index("Parameters"), result], axis=1).reset_index()
 if __name__ == "__main__":
+    latest_result_path_per_model = filter_latest_result_path_per_model(fetch_result_paths())
     with gr.Blocks(fill_height=True) as demo:
         gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
         with gr.Row():
             with gr.Column():
+                model_id_1 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Results")
                 load_btn_1 = gr.Button("Load")
             with gr.Column():
+                model_id_2 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Results")
                 load_btn_2 = gr.Button("Load")
         with gr.Row():
         load_btn_1.click(
             fn=render_result_1,
+            inputs=[model_id_1, compared_results],
             outputs=compared_results,
         )
         load_btn_2.click(
             fn=render_result_2,
+            inputs=[model_id_2, compared_results],
             outputs=compared_results,
         )