VAPO_data_demo / data_dir /elo_ranks.length_ablation.jsonl
DaYin's picture
Upload 69 files
e90fa51 verified
{"model name ": "gpt-4-0125-preview", "elo overall": 1228, "# battles": 5430}
{"model name ": "Mistral-7B-Instruct-v0.2", "elo overall": 1112, "# battles": 2371}
{"model name ": "Llama-2-70b-chat-hf.nosp", "elo overall": 1112, "# battles": 1942}
{"model name ": "Llama-2-7b-chat-hf.nosp", "elo overall": 1107, "# battles": 1898}
{"model name ": "Llama-2-13b-chat-hf.nosp", "elo overall": 1096, "# battles": 1888}
{"model name ": "zephyr-7b-beta", "elo overall": 1079, "# battles": 3367}
{"model name ": "Yi-34B-Chat", "elo overall": 1047, "# battles": 2428}
{"model name ": "tulu-2-dpo-70b", "elo overall": 1037, "# battles": 3435}
{"model name ": "claude-3-sonnet-20240229", "elo overall": 1014, "# battles": 2608}
{"model name ": "Mixtral-8x7B-Instruct-v0.1", "elo overall": 1011, "# battles": 3418}
{"model name ": "claude-3-opus-20240229", "elo overall": 1008, "# battles": 2367}
{"model name ": "command", "elo overall": 1000, "# battles": 1733}
{"model name ": "mistral-large-2402", "elo overall": 986, "# battles": 1881}
{"model name ": "Llama-2-70b-chat-hf", "elo overall": 955, "# battles": 2391}
{"model name ": "gemini-1.0-pro", "elo overall": 950, "# battles": 1816}
{"model name ": "Llama-2-13b-chat-hf", "elo overall": 934, "# battles": 2350}
{"model name ": "Llama-2-7b-chat-hf", "elo overall": 924, "# battles": 2297}
{"model name ": "Mistral-7B-Instruct-v0.1", "elo overall": 917, "# battles": 2480}
{"model name ": "gemma-7b-it", "elo overall": 915, "# battles": 2519}
{"model name ": "gemma-2b-it", "elo overall": 884, "# battles": 2484}
{"model name ": "vicuna-13b-v1.5", "elo overall": 846, "# battles": 2478}
{"model name ": "gpt-3.5-turbo-0125", "elo overall": 822, "# battles": 14003}