Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from transformers import AutoConfig | |
LLM_MODEL_ARCHS = { | |
"stablelm_epoch": "π΄ StableLM-Epoch", | |
"stablelm_alpha": "π΄ StableLM-Alpha", | |
"mixformer-sequential": "π§βπ» Phi Ο", | |
"RefinedWebModel": "π¦ Falcon", | |
"gpt_bigcode": "β StarCoder", | |
"RefinedWeb": "π¦ Falcon", | |
"baichuan": "π Baichuan ηΎε·", # river | |
"internlm": "π§βπ InternLM δΉ¦η", # scholar | |
"mistral": "βοΈ Mistral", | |
"mixtral": "βοΈ Mixtral", | |
"codegen": "βΎοΈ CodeGen", | |
"chatglm": "π¬ ChatGLM", | |
"falcon": "π¦ Falcon", | |
"bloom": "πΈ Bloom", | |
"llama": "π¦ LLaMA", | |
"rwkv": "π¦ββ¬ RWKV", | |
"deci": "π΅ deci", | |
"Yi": "π« Yi δΊΊ", # people | |
"mpt": "𧱠MPT", | |
# suggest something | |
"gpt_neox": "GPT-NeoX", | |
"gpt_neo": "GPT-Neo", | |
"gpt2": "GPT-2", | |
"gptj": "GPT-J", | |
"bart": "BART", | |
} | |
def model_hyperlink(link, model_name): | |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
def process_architectures(model): | |
# return "Unknown" | |
try: | |
config = AutoConfig.from_pretrained(model, trust_remote_code=True) | |
return LLM_MODEL_ARCHS.get(config.model_type, "Unknown") | |
except Exception: | |
return "Unknown" | |
def process_score(score, quantization): | |
if quantization != "Unquantized": | |
return f"{score:.2f}*" | |
else: | |
return f"{score:.2f} " | |
def process_quantizations(x): | |
if ( | |
x["config.backend.quantization_scheme"] == "bnb" | |
and x["config.backend.quantization_config.load_in_4bit"] is True | |
): | |
return "BnB.4bit" | |
elif ( | |
x["config.backend.quantization_scheme"] == "bnb" | |
and x["config.backend.quantization_config.load_in_8bit"] is True | |
): | |
return "BnB.8bit" | |
elif x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.bits"] == 4: | |
return "GPTQ.4bit" | |
elif x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.bits"] == 4: | |
return "AWQ.4bit" | |
else: | |
return "Unquantized" | |
def process_kernels(x): | |
if x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.version"] == 1: | |
return "GPTQ.ExllamaV1" | |
elif x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.version"] == 2: | |
return "GPTQ.ExllamaV2" | |
elif ( | |
x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.version"] == "gemm" | |
): | |
return "AWQ.GEMM" | |
elif ( | |
x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.version"] == "gemv" | |
): | |
return "AWQ.GEMV" | |
else: | |
return "No Kernel" | |
def test(): | |
model = "Qwen/Qwen1.5-32B" | |
config = AutoConfig.from_pretrained(model, trust_remote_code=True) | |
import pdb | |
pdb.set_trace() | |
if __name__ == "__main__": | |
test() | |