Spaces:

alexandrainst
/

radial-plot-generator

Running

App Files Files Community

saattrupdan commited on Jan 25

Commit

76e4363

•

1 Parent(s): 3e57038

feat: Change layout, fix task order, fix colours for models, fix range

Browse files

Files changed (2) hide show

.gitignore +1 -0
app.py +23 -17

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	.venv


1	.venv
2	+ __pycache__

app.py CHANGED Viewed

@@ -159,14 +159,15 @@ def main() -> None:
             "of different language models on different tasks. It is based on the "
             "generative results from the [ScandEval benchmark](https://scandeval.com)."
         )
-        with gr.Row():
-            with gr.Column():
                 language_names_dropdown = gr.Dropdown(
                     choices=all_languages,
                     multiselect=True,
                     label="Languages",
                     value=["Danish"],
                     interactive=True,
                 )
                 model_ids_dropdown = gr.Dropdown(
                     choices=danish_models,
@@ -174,17 +175,15 @@ def main() -> None:
                     label="Models",
                     value=["gpt-4-0613", "mistralai/Mistral-7B-v0.1"],
                     interactive=True,
                 )
                 use_win_ratio_checkbox = gr.Checkbox(
                     label="Compare models with win ratios (as opposed to raw scores)",
                     value=True,
                     interactive=True,
                 )
-                gr.Markdown(
-                    "<center>Made with ❤️ by the <a href=\"https://alexandra.dk\">"
-                    "Alexandra Institute</a>.</center>"
-                )
-            with gr.Column():
                 plot = gr.Plot(
                     value=produce_radial_plot(
                         model_ids_dropdown.value,
@@ -193,6 +192,11 @@ def main() -> None:
                         results_dfs=results_dfs,
                     ),
                 )
         language_names_dropdown.change(
             fn=partial(update_model_ids_dropdown, results_dfs=results_dfs),
@@ -371,7 +375,7 @@ def produce_radial_plot(
                 if model_id not in results_dfs_filtered[language].index:
                     continue
                 score = results_dfs_filtered[language].loc[model_id][task]
-                win_ratio = np.mean([
                     score >= other_score
                     for other_score in results_dfs_filtered[language][task].dropna()
                 ])
@@ -383,22 +387,21 @@ def produce_radial_plot(
                 result_list.append(np.mean(scores))
         results.append(result_list)
-    # Sort the results to avoid misleading radial plots
-    model_idx_with_highest_variance = np.argmax(
-        [np.std(result_list) for result_list in results]
-    )
-    sorted_idxs = np.argsort(results[model_idx_with_highest_variance])
-    results = [np.asarray(result_list)[sorted_idxs] for result_list in results]
-    tasks = np.asarray(tasks)[sorted_idxs]
     # Add the results to a plotly figure
     fig = go.Figure()
     for model_id, result_list in zip(model_ids, results):
         fig.add_trace(go.Scatterpolar(
             r=result_list,
             theta=[task.name for task in tasks],
             fill='toself',
             name=model_id,
         ))
     languages_str = ""
@@ -414,7 +417,10 @@ def produce_radial_plot(
     # Builds the radial plot from the results
     fig.update_layout(
-        polar=dict(radialaxis=dict(visible=True)), showlegend=True, title=title
     )
     logger.info("Successfully produced radial plot.")

             "of different language models on different tasks. It is based on the "
             "generative results from the [ScandEval benchmark](https://scandeval.com)."
         )
+        with gr.Column():
+            with gr.Row():
                 language_names_dropdown = gr.Dropdown(
                     choices=all_languages,
                     multiselect=True,
                     label="Languages",
                     value=["Danish"],
                     interactive=True,
+                    scale=2,
                 )
                 model_ids_dropdown = gr.Dropdown(
                     choices=danish_models,
                     label="Models",
                     value=["gpt-4-0613", "mistralai/Mistral-7B-v0.1"],
                     interactive=True,
+                    scale=2,
                 )
                 use_win_ratio_checkbox = gr.Checkbox(
                     label="Compare models with win ratios (as opposed to raw scores)",
                     value=True,
                     interactive=True,
+                    scale=1,
                 )
+            with gr.Row():
                 plot = gr.Plot(
                     value=produce_radial_plot(
                         model_ids_dropdown.value,
                         results_dfs=results_dfs,
                     ),
                 )
+            with gr.Row():
+                gr.Markdown(
+                    "<center>Made with ❤️ by the <a href=\"https://alexandra.dk\">"
+                    "Alexandra Institute</a>.</center>"
+                )
         language_names_dropdown.change(
             fn=partial(update_model_ids_dropdown, results_dfs=results_dfs),
                 if model_id not in results_dfs_filtered[language].index:
                     continue
                 score = results_dfs_filtered[language].loc[model_id][task]
+                win_ratio = 100 * np.mean([
                     score >= other_score
                     for other_score in results_dfs_filtered[language][task].dropna()
                 ])
                 result_list.append(np.mean(scores))
         results.append(result_list)
     # Add the results to a plotly figure
     fig = go.Figure()
     for model_id, result_list in zip(model_ids, results):
+        # Generate colour for model, as an RGB triplet. The same model will always
+        # have the same colour
+        random.seed(model_id)
+        r, g, b = tuple(random.randint(0, 255) for _ in range(3))
         fig.add_trace(go.Scatterpolar(
             r=result_list,
             theta=[task.name for task in tasks],
             fill='toself',
             name=model_id,
+            line=dict(color=f'rgb({r}, {g}, {b})'),
         ))
     languages_str = ""
     # Builds the radial plot from the results
     fig.update_layout(
+        polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
+        showlegend=True,
+        title=title,
+        width=800,
     )
     logger.info("Successfully produced radial plot.")