Gregor Betz commited on
Commit
e4b2a4a
β€’
1 Parent(s): a4c8576
Files changed (2) hide show
  1. app.py +88 -1
  2. src/envs.py +4 -4
app.py CHANGED
@@ -243,6 +243,93 @@ with demo:
243
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
244
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  with gr.Row():
247
  with gr.Accordion("πŸ“™ Citation", open=False):
248
  citation_button = gr.Textbox(
@@ -256,4 +343,4 @@ with demo:
256
  scheduler = BackgroundScheduler()
257
  scheduler.add_job(restart_space, "interval", seconds=1800)
258
  scheduler.start()
259
- demo.queue(default_concurrency_limit=40).launch()
 
243
  with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
244
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
245
 
246
+ with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
247
+ with gr.Column():
248
+ with gr.Row():
249
+ gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
250
+
251
+ with gr.Column():
252
+ with gr.Accordion(
253
+ f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
254
+ open=False,
255
+ ):
256
+ with gr.Row():
257
+ finished_eval_table = gr.components.Dataframe(
258
+ value=finished_eval_queue_df,
259
+ headers=EVAL_COLS,
260
+ datatype=EVAL_TYPES,
261
+ row_count=5,
262
+ )
263
+ with gr.Accordion(
264
+ f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})",
265
+ open=False,
266
+ ):
267
+ with gr.Row():
268
+ running_eval_table = gr.components.Dataframe(
269
+ value=running_eval_queue_df,
270
+ headers=EVAL_COLS,
271
+ datatype=EVAL_TYPES,
272
+ row_count=5,
273
+ )
274
+
275
+ with gr.Accordion(
276
+ f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
277
+ open=False,
278
+ ):
279
+ with gr.Row():
280
+ pending_eval_table = gr.components.Dataframe(
281
+ value=pending_eval_queue_df,
282
+ headers=EVAL_COLS,
283
+ datatype=EVAL_TYPES,
284
+ row_count=5,
285
+ )
286
+ with gr.Row():
287
+ gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
288
+
289
+ with gr.Row():
290
+ with gr.Column():
291
+ model_name_textbox = gr.Textbox(label="Model name")
292
+ revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
293
+ model_type = gr.Dropdown(
294
+ choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
295
+ label="Model type",
296
+ multiselect=False,
297
+ value=None,
298
+ interactive=True,
299
+ )
300
+
301
+ with gr.Column():
302
+ precision = gr.Dropdown(
303
+ choices=[i.value.name for i in Precision if i != Precision.Unknown],
304
+ label="Precision",
305
+ multiselect=False,
306
+ value="float16",
307
+ interactive=True,
308
+ )
309
+ weight_type = gr.Dropdown(
310
+ choices=[i.value.name for i in WeightType],
311
+ label="Weights type",
312
+ multiselect=False,
313
+ value="Original",
314
+ interactive=True,
315
+ )
316
+ base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
317
+
318
+ submit_button = gr.Button("Submit Eval")
319
+ submission_result = gr.Markdown()
320
+ submit_button.click(
321
+ add_new_eval,
322
+ [
323
+ model_name_textbox,
324
+ base_model_name_textbox,
325
+ revision_name_textbox,
326
+ precision,
327
+ weight_type,
328
+ model_type,
329
+ ],
330
+ submission_result,
331
+ )
332
+
333
  with gr.Row():
334
  with gr.Accordion("πŸ“™ Citation", open=False):
335
  citation_button = gr.Textbox(
 
343
  scheduler = BackgroundScheduler()
344
  scheduler.add_job(restart_space, "interval", seconds=1800)
345
  scheduler.start()
346
+ demo.queue(default_concurrency_limit=40).launch()
src/envs.py CHANGED
@@ -5,10 +5,10 @@ from huggingface_hub import HfApi
5
  # clone / pull the lmeh eval data
6
  TOKEN = os.environ.get("TOKEN", None)
7
 
8
- OWNER = "demo-leaderboard"
9
- REPO_ID = f"{OWNER}/leaderboard"
10
- QUEUE_REPO = f"{OWNER}/requests"
11
- RESULTS_REPO = f"{OWNER}/results"
12
 
13
  CACHE_PATH=os.getenv("HF_HOME", ".")
14
 
 
5
  # clone / pull the lmeh eval data
6
  TOKEN = os.environ.get("TOKEN", None)
7
 
8
+ OWNER = "logikon"
9
+ REPO_ID = f"{OWNER}/open_cot_leaderboard"
10
+ QUEUE_REPO = f"{OWNER}/cot-leaderboard-requests"
11
+ RESULTS_REPO = f"{OWNER}/cot-leaderboard-results"
12
 
13
  CACHE_PATH=os.getenv("HF_HOME", ".")
14