from opencompass.models import LLaMA2AccessoryModel

# Please follow the LLaMA2-Accessory installation document
# https://llama2-accessory.readthedocs.io/en/latest/install.html
# to install LLaMA2-Accessory

models = [
    dict(
        abbr="Accessory_mixtral_8x7b",
        type=LLaMA2AccessoryModel,

        # additional_stop_symbols=["###"],  # for models tuned with chat template  # noqa
        additional_stop_symbols=[],

        # <begin> kwargs for accessory.MetaModel.from_pretrained
        # download from https://huggingface.co/Alpha-VLLM/MoE-Mixtral-7B-8Expert/tree/main/converted_sparse  # noqa
        # see https://llama2-accessory.readthedocs.io/en/latest/projects/mixtral-8x7b.html for more details  # noqa
        pretrained_path="path/to/MoE-Mixtral-7B-8Expert/converted_sparse",
        llama_type=None,  # None for automatic probe from pretrained_path
        llama_config=None,  # None for automatic probe from pretrained_path
        tokenizer_path=None,  # None for automatic probe from pretrained_path
        with_visual=False,
        max_seq_len=4096,
        quant=False,
        # <end>

        batch_size=2,
        # LLaMA2-Accessory needs num_gpus==num_procs
        run_cfg=dict(num_gpus=2, num_procs=2),
    ),
]