Spaces:
Runtime error
Runtime error
# Load the model. | |
# Note: It can take a while to download LLaMA and add the adapter modules. | |
# You can also use the 13B model by loading in 4bits. | |
import torch | |
from peft import PeftModel | |
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer | |
model_name = "baffo32/decapoda-research-llama-7b-hf" | |
adapters_name = 'timdettmers/guanaco-7b' | |
print(f"Starting to load the model {model_name} into memory") | |
m = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
#load_in_4bit=True, | |
torch_dtype=torch.bfloat16, | |
device_map={"": 0} | |
) | |
m = PeftModel.from_pretrained(m, adapters_name) | |
m = m.merge_and_unload() | |
tok = LlamaTokenizer.from_pretrained(model_name) | |
tok.bos_token_id = 1 | |
stop_token_ids = [0] | |
print(f"Successfully loaded the model {model_name} into memory") |