onekq commited on
Commit
126a4c3
1 Parent(s): 56906eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -4,8 +4,9 @@ import numpy as np
4
  from collections import defaultdict
5
  from gradio_leaderboard import Leaderboard, SelectColumns
6
 
7
- # Load the DataFrame from the CSV file for detailed pass@k metrics
8
  df = pd.read_csv('results.csv')
 
9
 
10
  # Ensure 'Model' and 'Scenario' columns are strings
11
  df['Model'] = df['Model'].astype(str)
@@ -68,13 +69,19 @@ with demo:
68
  "[Blog](https://huggingface.co/blog/onekq/all-llms-write-great-code) "
69
  "[Github](https://github.com/onekq/WebApp1k) " +
70
  "[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)")
 
71
  # Initialize leaderboard with the complete DataFrame
 
 
 
 
72
  complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
73
  'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean(),
74
  'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
75
  'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
76
  }, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
77
 
 
78
  leaderboard = init_leaderboard(complete_pass_at_k, height=800)
79
 
80
  # Launch the Gradio interface
 
4
  from collections import defaultdict
5
  from gradio_leaderboard import Leaderboard, SelectColumns
6
 
7
+ # Load the DataFrame from the CSV files for detailed pass@k metrics
8
  df = pd.read_csv('results.csv')
9
+ duo_df = pd.read_csv('results_duo.csv')
10
 
11
  # Ensure 'Model' and 'Scenario' columns are strings
12
  df['Model'] = df['Model'].astype(str)
 
69
  "[Blog](https://huggingface.co/blog/onekq/all-llms-write-great-code) "
70
  "[Github](https://github.com/onekq/WebApp1k) " +
71
  "[AI Models](https://www.aimodels.fyi/papers/arxiv/webapp1k-practical-code-generation-benchmark-web-app)")
72
+
73
  # Initialize leaderboard with the complete DataFrame
74
+ duo_complete_pass_at_k = duo_df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
75
+ 'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean()
76
+ }, index=['pass@1'])).reset_index()
77
+
78
  complete_pass_at_k = df.groupby('Model')[['Runs', 'Successes']].apply(lambda x: pd.Series({
79
  'pass@1': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 1).mean(),
80
  'pass@5': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 5).mean(),
81
  'pass@10': estimate_pass_at_k(x['Runs'].values, x['Successes'].values, 10).mean()
82
  }, index=['pass@1', 'pass@5', 'pass@10'])).reset_index()
83
 
84
+ duo_leaderboard = init_leaderboard(duo_complete_pass_at_k, height=800)
85
  leaderboard = init_leaderboard(complete_pass_at_k, height=800)
86
 
87
  # Launch the Gradio interface