Evan Frick commited on
Commit
ebb58d8
1 Parent(s): 28a71da
Files changed (1) hide show
  1. app.py +17 -4
app.py CHANGED
@@ -24,6 +24,10 @@ def load_data(file_path):
24
  def contains_list(column):
25
  return column.apply(lambda x: isinstance(x, list)).any()
26
 
 
 
 
 
27
  def main():
28
  # Load the JSON data
29
  data = load_data('results.json')
@@ -57,10 +61,19 @@ def main():
57
  if isinstance(submetrics, dict):
58
  for metric_name, value in submetrics.items():
59
  # Create a compound key
60
- key = f"{subkey} - {metric_name}"
61
- flattened_metrics[key] = value
 
 
 
 
 
 
 
 
62
  else:
63
  flattened_metrics[subkey] = submetrics
 
64
  records.append({
65
  "Model": model,
66
  "Type": model_type,
@@ -81,7 +94,7 @@ def main():
81
  df = df.loc[:, ~df.apply(contains_list)]
82
 
83
  if "human" not in selected_benchmark:
84
- df = df[sorted(df.columns, key=lambda s: s.lower() if s != "Type" else "A")]
85
 
86
  # Set 'Model' as the index
87
  df.set_index(["Model"], inplace=True)
@@ -122,7 +135,7 @@ def main():
122
 
123
 
124
  # Display the DataFrame
125
- st.dataframe(df_display.sort_values(df_display.columns[1], ascending=False).style.background_gradient(cmap='summer_r', axis=0)
126
  if len(df_display) else df_display, use_container_width=True, height=500)
127
 
128
  # Optional: Allow user to download the data as CSV
 
24
  def contains_list(column):
25
  return column.apply(lambda x: isinstance(x, list)).any()
26
 
27
+ INVERT = {'brier', 'loss'}
28
+
29
+ SCALE = {'accuracy', 'row-wise pearson', 'confidence_agreement', 'spearman', 'kendalltau', 'arena_under_curve', 'mean_max_score', 'mean_end_score'}
30
+
31
  def main():
32
  # Load the JSON data
33
  data = load_data('results.json')
 
61
  if isinstance(submetrics, dict):
62
  for metric_name, value in submetrics.items():
63
  # Create a compound key
64
+ if metric_name in SCALE:
65
+
66
+ value = 100 * value
67
+
68
+ if metric_name in INVERT:
69
+ key = f"{subkey} - (1 - {metric_name})"
70
+ flattened_metrics[key] = 1 - value
71
+ else:
72
+ key = f"{subkey} - {metric_name}"
73
+ flattened_metrics[key] = value
74
  else:
75
  flattened_metrics[subkey] = submetrics
76
+
77
  records.append({
78
  "Model": model,
79
  "Type": model_type,
 
94
  df = df.loc[:, ~df.apply(contains_list)]
95
 
96
  if "human" not in selected_benchmark:
97
+ df = df[sorted(df.columns, key=lambda s: s.replace("(1", "l").lower() if s != "Type" else "A")]
98
 
99
  # Set 'Model' as the index
100
  df.set_index(["Model"], inplace=True)
 
135
 
136
 
137
  # Display the DataFrame
138
+ st.dataframe(df_display.sort_values(df_display.columns[1], ascending=False).style.background_gradient(cmap='summer_r', axis=0).format(precision=4)
139
  if len(df_display) else df_display, use_container_width=True, height=500)
140
 
141
  # Optional: Allow user to download the data as CSV