RESULTS_DATASET_ID = "datasets/open-llm-leaderboard/results" # EXCLUDED_KEYS = { # "pretty_env_info", # "chat_template", # "group_subtasks", # } # EXCLUDED_RESULTS_KEYS = { # "leaderboard", # } # EXCLUDED_RESULTS_LEADERBOARDS_KEYS = { # "alias", # } DETAILS_DATASET_ID = "datasets/open-llm-leaderboard/{model_name_sanitized}-details" DETAILS_FILENAME = "samples_{subtask}_*.json" TASKS = { "leaderboard_arc_challenge": ("ARC", "leaderboard_arc_challenge"), "leaderboard_bbh": ("BBH", "leaderboard_bbh"), "leaderboard_gpqa": ("GPQA", "leaderboard_gpqa"), "leaderboard_ifeval": ("IFEval", "leaderboard_ifeval"), "leaderboard_math_hard": ("MATH", "leaderboard_math"), "leaderboard_mmlu_pro": ("MMLU-Pro", "leaderboard_mmlu_pro"), "leaderboard_musr": ("MuSR", "leaderboard_musr"), } SUBTASKS = { "leaderboard_arc_challenge": ["leaderboard_arc_challenge"], "leaderboard_bbh": [ "leaderboard_bbh_boolean_expressions", "leaderboard_bbh_causal_judgement", "leaderboard_bbh_date_understanding", "leaderboard_bbh_disambiguation_qa", "leaderboard_bbh_formal_fallacies", "leaderboard_bbh_geometric_shapes", "leaderboard_bbh_hyperbaton", "leaderboard_bbh_logical_deduction_five_objects", "leaderboard_bbh_logical_deduction_seven_objects", "leaderboard_bbh_logical_deduction_three_objects", "leaderboard_bbh_movie_recommendation", "leaderboard_bbh_navigate", "leaderboard_bbh_object_counting", "leaderboard_bbh_penguins_in_a_table", "leaderboard_bbh_reasoning_about_colored_objects", "leaderboard_bbh_ruin_names", "leaderboard_bbh_salient_translation_error_detection", "leaderboard_bbh_snarks", "leaderboard_bbh_sports_understanding", "leaderboard_bbh_temporal_sequences", "leaderboard_bbh_tracking_shuffled_objects_five_objects", "leaderboard_bbh_tracking_shuffled_objects_seven_objects", "leaderboard_bbh_tracking_shuffled_objects_three_objects", "leaderboard_bbh_web_of_lies", ], "leaderboard_gpqa": [ "leaderboard_gpqa_extended", "leaderboard_gpqa_diamond", "leaderboard_gpqa_main", ], "leaderboard_ifeval": ["leaderboard_ifeval"], # "leaderboard_math_hard": [ "leaderboard_math": [ "leaderboard_math_algebra_hard", "leaderboard_math_counting_and_prob_hard", "leaderboard_math_geometry_hard", "leaderboard_math_intermediate_algebra_hard", "leaderboard_math_num_theory_hard", "leaderboard_math_prealgebra_hard", "leaderboard_math_precalculus_hard", ], "leaderboard_mmlu_pro": ["leaderboard_mmlu_pro"], "leaderboard_musr": [ "leaderboard_musr_murder_mysteries", "leaderboard_musr_object_placements", "leaderboard_musr_team_allocation", ], }