--- license: apache-2.0 --- How to use: ``` import torch from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig # Load Base Model base_model_id = "mistralai/Mistral-7B-v0.1" bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 ) model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config) eval_tokenizer = AutoTokenizer.from_pretrained( base_model_id, add_bos_token=True, trust_remote_code=True, ) eval_tokenizer.pad_token = eval_tokenizer.eos_token # Load Peft Weights from peft import PeftModel ft_model = PeftModel.from_pretrained(model, "mistral-samsum-finetune/checkpoint-150") # Format the Sample Input def formatting_func(example): text = f"### Summarize this dialog:\n{example['dialogue']}\n### Summary:\n{example['summary']}" return text max_length = 256 eval_prompt = {'dialogue': "Amanda: I baked cookies. Do you want some? Jerry: Sure! Amanda: I'll bring you tomorrow :-)", 'summary': ''} eval_prompt = formatting_func(eval_prompt) # Generate summary for sample Input model_input = eval_tokenizer( eval_prompt, truncation=True, max_length=max_length, padding="max_length", return_tensors="pt").to("cuda") ft_model.eval() with torch.no_grad(): print(eval_tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=256, repetition_penalty=1.15)[0], skip_special_tokens=True)) # here is the output: """ ### Summarize this dialog: Amanda: I baked cookies. Do you want some? Jerry: Sure! Amanda: I'll bring you tomorrow :-) ### Summary: Jerry will get some cookies from Amanda tomorrow. """ ```