model_name = "circulus/Llama-2-13b-llava-v1"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", quantization_config=config)
- Downloads last month
- 15
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.