TITLE = """

🏆 CLEM Leaderboard

""" INTRODUCTION_TEXT = """
The CLEM Leaderboard aims to track, rank and evaluate current cLLMs (chat-optimized Large Language Models) with the suggested pronounciation “clems”. The benchmarking approach is described in [Clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents](https://arxiv.org/abs/2305.13455). Source code for benchmarking "clems" is available here: [Clembench](https://github.com/clembench/clembench) All generated files and results from the benchmark runs are available here: [clembench-runs](https://github.com/clembench/clembench-runs)
""" SHORT_NAMES = { "t0.0": "", "claude-v1.3": "cl-1.3", "claude-2": "cl-2", "claude-2.1": "cl-2.1", "claude-instant-1.2": "cl-ins-1.2", "gpt-3.5-turbo-0613": "3.5-0613", "gpt-3.5-turbo-1106": "3.5-1106", "gpt-4-0613": "4-0613", "gpt-4-1106-preview": "4-1106", "gpt-4-0314": "4-0314", "gpt-4": "4", "text-davinci-003": "3", "luminous-supreme": "lm", "koala-13b": "k-13b", "falcon-40b": "fal-40b", "falcon-7b-instruct": "fal-7b", "falcon-40b-instruct": "flc-i-40b", "oasst-12b": "oas-12b", "oasst-sft-4-pythia-12b-epoch-3.5": "ost-12b", "vicuna-13b": "vic-13b", "vicuna-33b-v1.3": "vic-33b-v1.3", "sheep-duck-llama-2-70b-v1.1": "sd-l2-70b-v1.1", "sheep-duck-llama-2-13b": "sd-l2-13b", "WizardLM-70b-v1.0": "w-70b-v1.0", "CodeLlama-34b-Instruct-hf": "cl-34b", "command": "com", "Mistral-7B-Instruct-v0.1": "m-i-7b-v0.1", "Wizard-Vicuna-13B-Uncensored-HF": "vcn-13b", "llama-2-13b-chat-hf": "l2-13b", "llama-2-70b-chat-hf": "l2-70b", "llama-2-7b-chat-hf": "l2-7b", "koala-13B-HF": "k-13b", "WizardLM-13b-v1.2": "w-13b-v1.2", "vicuna-7b-v1.5": "vic-7b-v1.5", "vicuna-13b-v1.5": "vic-13b-v1.5", "gpt4all-13b-snoozy": "g4a-13b-s", "zephyr-7b-alpha":"z-7b-a", "zephyr-7b-beta":"z-7b-b" }