woters commited on
Commit
1acb2f0
1 Parent(s): 85a2ba5
Files changed (2) hide show
  1. app.py +115 -25
  2. requirements.txt +2 -1
app.py CHANGED
@@ -4,6 +4,8 @@ import random
4
  import firebase_admin
5
  from firebase_admin import credentials
6
  from firebase_admin import firestore
 
 
7
 
8
  CSV_FILE_PATH = "qa_pairs.csv"
9
 
@@ -29,8 +31,8 @@ def fetch_questions():
29
  return questions_list
30
 
31
 
32
- def display_answers(question, model1, model2):
33
- df = pd.read_csv(CSV_FILE_PATH)
34
  answers = {
35
  model1: "No answer available for Model 1",
36
  model2: "No answer available for Model 2",
@@ -38,7 +40,7 @@ def display_answers(question, model1, model2):
38
  for model in [model1, model2]:
39
  filtered_df = df[(df['question'] == question) & (df['model'] == model)]
40
  if not filtered_df.empty:
41
- answers[model] = f"**{model} Answer:**\n{filtered_df['answer'].iloc[0]}"
42
  return answers[model1], answers[model2]
43
 
44
 
@@ -65,11 +67,23 @@ def update_symbols1(q,m1,a1,m2,a2):
65
  )
66
  votes_ref = db.collection('votes')
67
  vote_doc = votes_ref.document(m1).get()
 
 
 
 
 
 
 
 
 
 
 
 
68
  if vote_doc.exists:
69
- votes_ref.document(m1).update({'count': firestore.Increment(1)})
70
  else:
71
- votes_ref.document(m1).set({'count': 1})
72
- update_total_votes()
73
  return update_symbols(q, m1, a1, m2, a2)
74
 
75
 
@@ -83,7 +97,7 @@ def update_symbols2(q, m1, a1, m2, a2):
83
  output2=a2,
84
  outcome='tie'
85
  )
86
- update_total_votes()
87
  return update_symbols(q, m1, a1, m2, a2)
88
 
89
  def update_symbols3(q, m1, a1, m2, a2):
@@ -98,17 +112,29 @@ def update_symbols3(q, m1, a1, m2, a2):
98
  )
99
  votes_ref = db.collection('votes')
100
  vote_doc = votes_ref.document(m2).get()
 
 
 
 
 
 
 
 
 
 
 
 
101
  if vote_doc.exists:
102
- votes_ref.document(m2).update({'count': firestore.Increment(1)})
103
  else:
104
- votes_ref.document(m2).set({'count': 1})
105
- update_total_votes()
106
  return update_symbols(q, m1, a1, m2, a2)
107
 
108
  def update_symbols(q,m1,a1,m2,a2):
109
  random_question = random.choice(questions)
110
  random_model1, random_model2 = random.sample(models, 2)
111
- answer1, answer2 = display_answers(random_question, random_model1, random_model2)
112
  m1 = gr.Markdown(f"{random_model1}", visible=False)
113
  a1 = gr.Markdown(answer1)
114
  q = gr.Markdown(f"{random_question}")
@@ -145,29 +171,92 @@ def log_vote(model1, model2, question, output1, output2, outcome):
145
 
146
  def fetch_and_format_leaderboard():
147
  vote_counts_ref = db.collection('votes')
148
- # Ensure you're using FieldPath.document_id() correctly
149
  docs = vote_counts_ref.stream()
150
 
151
  leaderboard = []
152
  for doc in docs:
 
153
  model_name = doc.id
154
- vote_count = doc.to_dict().get('count', 0)
155
- leaderboard.append(f"{model_name}: {vote_count} votes")
156
-
157
- # Optional: Sort the leaderboard by vote count in descending order
158
- leaderboard.sort(key=lambda x: int(x.split(': ')[1].split(' ')[0]), reverse=True)
159
-
160
- return "\n".join(leaderboard)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  #questions = list_questions()
163
- models = list_models()
164
- random_question = 'Click any button to start!'
165
- random_model1, random_model2 = '1', '2'
166
- answer1, answer2 = display_answers(random_question, random_model1, random_model2)
167
 
168
 
169
  db = firestore.client()
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  questions = []
172
  questions_ = fetch_questions()
173
  for question in questions_:
@@ -175,6 +264,7 @@ for question in questions_:
175
 
176
  votes_ref = db.collection('votes')
177
 
 
178
  def create_app():
179
 
180
  print('-----------------------')
@@ -204,8 +294,8 @@ def create_app():
204
  # b5 = gr.Button("Show Leaderboard")
205
 
206
  initial_leaderboard_data = fetch_and_format_leaderboard()
207
- leaderboard_display = gr.Textbox(value=initial_leaderboard_data,label="Leaderboard", placeholder="Leaderboard will be displayed here.",
208
- lines=30, visible=True)
209
  #b5.click(fn=fetch_and_format_leaderboard, inputs=[], outputs=leaderboard_display)
210
 
211
  b4.click(update_b, inputs=[q,m1,a1,m2,a2], outputs=[q,m1,a1,m2,a2,b1,b2,b3, b4])
 
4
  import firebase_admin
5
  from firebase_admin import credentials
6
  from firebase_admin import firestore
7
+ from trueskill import Rating
8
+ import trueskill
9
 
10
  CSV_FILE_PATH = "qa_pairs.csv"
11
 
 
31
  return questions_list
32
 
33
 
34
+ def display_answers(question, model1, model2, df):
35
+ #df = pd.read_csv(CSV_FILE_PATH)
36
  answers = {
37
  model1: "No answer available for Model 1",
38
  model2: "No answer available for Model 2",
 
40
  for model in [model1, model2]:
41
  filtered_df = df[(df['question'] == question) & (df['model'] == model)]
42
  if not filtered_df.empty:
43
+ answers[model] = f"**Answer:**\n{filtered_df['answer'].iloc[0]}"
44
  return answers[model1], answers[model2]
45
 
46
 
 
67
  )
68
  votes_ref = db.collection('votes')
69
  vote_doc = votes_ref.document(m1).get()
70
+ elo_count_1 = vote_doc.get('elo_rating')
71
+ elo1 = Rating(elo_count_1)
72
+ if vote_doc.exists:
73
+ votes_ref.document(m1).update({'win_count': firestore.Increment(1)})
74
+ else:
75
+ votes_ref.document(m1).set({'win_count': 1})
76
+ vote_doc = votes_ref.document(m2).get()
77
+ elo_count_2 = vote_doc.get('elo_rating')
78
+ elo2 = Rating(elo_count_2)
79
+ elo1, elo2 = trueskill.rate_1vs1(elo1, elo2)
80
+ votes_ref.document(m2).update({'elo_rating': elo2.mu})
81
+ votes_ref.document(m1).update({'elo_rating': elo1.mu})
82
  if vote_doc.exists:
83
+ votes_ref.document(m2).update({'loss_count': firestore.Increment(1)})
84
  else:
85
+ votes_ref.document(m2).set({'loss_count': 1})
86
+
87
  return update_symbols(q, m1, a1, m2, a2)
88
 
89
 
 
97
  output2=a2,
98
  outcome='tie'
99
  )
100
+ #update_total_votes()
101
  return update_symbols(q, m1, a1, m2, a2)
102
 
103
  def update_symbols3(q, m1, a1, m2, a2):
 
112
  )
113
  votes_ref = db.collection('votes')
114
  vote_doc = votes_ref.document(m2).get()
115
+ elo_count_2 = vote_doc.get('elo_rating')
116
+ elo2 = Rating(elo_count_2)
117
+ if vote_doc.exists:
118
+ votes_ref.document(m2).update({'win_count': firestore.Increment(1)})
119
+ else:
120
+ votes_ref.document(m2).set({'win_count': 1})
121
+ vote_doc = votes_ref.document(m1).get()
122
+ elo_count_1 = vote_doc.get('elo_rating')
123
+ elo1 = Rating(elo_count_1)
124
+ elo1, elo2 = trueskill.rate_1vs1(elo2, elo1)
125
+ votes_ref.document(m2).update({'elo_rating': elo2.mu})
126
+ votes_ref.document(m1).update({'elo_rating': elo1.mu})
127
  if vote_doc.exists:
128
+ votes_ref.document(m1).update({'loss_count': firestore.Increment(1)})
129
  else:
130
+ votes_ref.document(m1).set({'loss_count': 1})
131
+ #update_total_votes()
132
  return update_symbols(q, m1, a1, m2, a2)
133
 
134
  def update_symbols(q,m1,a1,m2,a2):
135
  random_question = random.choice(questions)
136
  random_model1, random_model2 = random.sample(models, 2)
137
+ answer1, answer2 = display_answers(random_question, random_model1, random_model2, combined_df)
138
  m1 = gr.Markdown(f"{random_model1}", visible=False)
139
  a1 = gr.Markdown(answer1)
140
  q = gr.Markdown(f"{random_question}")
 
171
 
172
  def fetch_and_format_leaderboard():
173
  vote_counts_ref = db.collection('votes')
 
174
  docs = vote_counts_ref.stream()
175
 
176
  leaderboard = []
177
  for doc in docs:
178
+ model_data = doc.to_dict()
179
  model_name = doc.id
180
+ win_count = model_data.get('win_count', 0)
181
+ loss_count = model_data.get('loss_count', 0)
182
+ total_matches = win_count + loss_count
183
+ win_rate = (win_count / total_matches) * 100 if total_matches > 0 else 0
184
+ elo_rating = model_data.get('elo_rating', 0)
185
+
186
+ leaderboard.append({
187
+ "model": model_name,
188
+ "win_rate": win_rate,
189
+ "elo_rating": elo_rating
190
+ })
191
+
192
+ # Sort the leaderboard by elo_rating in descending order
193
+ leaderboard.sort(key=lambda x: x['win_rate'], reverse=True)
194
+ leaderboard_df = pd.DataFrame(leaderboard)
195
+ leaderboard_df['Rank'] = leaderboard_df['win_rate'].rank(method='max', ascending=False).astype(int)
196
+
197
+ # Reorder columns to match your requirement
198
+ leaderboard_df = leaderboard_df[['Rank', 'model', 'win_rate', 'elo_rating'
199
+ ]]
200
+
201
+ # Format the DataFrame as a string for display; you might adjust this part based on how Gradio expects the data
202
+ # For Gradio, you might directly return the DataFrame instead of converting it to a string
203
+ return leaderboard_df
204
 
205
  #questions = list_questions()
206
+
 
 
 
207
 
208
 
209
  db = firestore.client()
210
 
211
+ def fetch_questions_c(collection):
212
+ questions_ref = db.collection(collection)
213
+ docs = questions_ref.stream()
214
+ questions_list = []
215
+ for doc in docs:
216
+ question = doc.to_dict()
217
+ questions_list.append(question)
218
+ return questions_list
219
+
220
+ codekobzar = fetch_questions_c('codekobzar')
221
+ gpt = fetch_questions_c('gpt-4')
222
+ llama = fetch_questions_c('llama-2-70b-chat')
223
+ sherlocknorag = fetch_questions_c('sherlock-no-rag')
224
+ sherlockrag = fetch_questions_c('sherlock-rag')
225
+ ukrainenow = fetch_questions_c('ukrainenow')
226
+
227
+ df1 = pd.DataFrame(codekobzar)
228
+ df2 = pd.DataFrame(gpt)
229
+ df3 = pd.DataFrame(llama)
230
+ df4 = pd.DataFrame(sherlocknorag)
231
+ df5 = pd.DataFrame(sherlockrag)
232
+ df6 = pd.DataFrame(ukrainenow)
233
+ df1['model'] = 'codekobzar'
234
+ df2['model'] = 'gpt-4'
235
+ df3['model'] = 'llama-2-70b-chat'
236
+ df4['model'] = 'sherlock-no-rag'
237
+ df5['model'] = 'sherlock-rag'
238
+ df6['model'] = 'ukrainenow'
239
+
240
+
241
+ combined_df = pd.concat([df1, df2, df3, df4, df5, df6], ignore_index=True)
242
+ combined_df.drop('input',axis=1,inplace=True)
243
+ combined_df.rename(columns={'instruction': 'question', 'output': 'answer'}, inplace=True)
244
+
245
+ models = ['codekobzar','gpt-4','llama-2-70b-chat','sherlock-no-rag','sherlock-rag','ukrainenow']#list_models()
246
+
247
+ votes_ref = db.collection('votes')
248
+ for model in models:
249
+ vote_doc = votes_ref.document(model).get()
250
+ votes_ref.document(model).set({'win_count': 0})
251
+ votes_ref.document(model).set({'loss_count': 0})
252
+ votes_ref.document(model).set({'elo_rating': 25})
253
+
254
+
255
+
256
+ random_question = 'Click any button to start!'
257
+ random_model1, random_model2 = '1', '2'
258
+ answer1, answer2 = display_answers(random_question, random_model1, random_model2,combined_df)
259
+
260
  questions = []
261
  questions_ = fetch_questions()
262
  for question in questions_:
 
264
 
265
  votes_ref = db.collection('votes')
266
 
267
+
268
  def create_app():
269
 
270
  print('-----------------------')
 
294
  # b5 = gr.Button("Show Leaderboard")
295
 
296
  initial_leaderboard_data = fetch_and_format_leaderboard()
297
+ #leaderboard_display = gr.Textbox(value=initial_leaderboard_data,label="Leaderboard", placeholder="Leaderboard will be displayed here.",lines=30, visible=True)
298
+ leaderboard_display = gr.Dataframe(value=initial_leaderboard_data, label="Leaderboard")
299
  #b5.click(fn=fetch_and_format_leaderboard, inputs=[], outputs=leaderboard_display)
300
 
301
  b4.click(update_b, inputs=[q,m1,a1,m2,a2], outputs=[q,m1,a1,m2,a2,b1,b2,b3, b4])
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  firebase-admin
2
- pandas
 
 
1
  firebase-admin
2
+ pandas
3
+ trueskill