# KUET CHATBOT Contributed by Jesiara Khatun and Sadia Islam (CSE 2K19, KUET) ``` !pip install peft -q !pip install transformers[sentencepiece] -q !pip install sentencepiece !pip install accelerate !pip install bitsandbytes ``` ``` import torch from peft import PeftModel import transformers import textwrap from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig from transformers.generation.utils import GreedySearchDecoderOnlyOutput DEVICE = "cuda" if torch.cuda.is_available() else "cpu" DEVICE tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf") model = LlamaForCausalLM.from_pretrained( "abhishek/llama-2-7b-hf-small-shards", load_in_4bit=True, device_map="auto", ) model = PeftModel.from_pretrained(model, "shahidul034/kuet_chatbot", torch_dtype=torch.float16) model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk model.config.bos_token_id = 1 model.config.eos_token_id = 2 model = model.eval() model = torch.compile(model) PROMPT_TEMPLATE = f""" Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: [INSTRUCTION] ### Response: """ def create_prompt(instruction: str) -> str: return PROMPT_TEMPLATE.replace("[INSTRUCTION]", instruction) # print(create_prompt("What is (are) Glaucoma ?")) def generate_response(prompt: str, model: PeftModel) -> GreedySearchDecoderOnlyOutput: encoding = tokenizer(prompt, return_tensors="pt") input_ids = encoding["input_ids"].to(DEVICE) generation_config = GenerationConfig( temperature=0.1, top_p=0.75, repetition_penalty=1.1, ) with torch.inference_mode(): return model.generate( input_ids=input_ids, generation_config=generation_config, return_dict_in_generate=True, output_scores=True, max_new_tokens=256, ) def format_response(response: GreedySearchDecoderOnlyOutput) -> str: decoded_output = tokenizer.decode(response.sequences[0]) response = decoded_output.split("### Response:")[1].strip() return "\n".join(textwrap.wrap(response)) def ask_alpaca(prompt: str, model: PeftModel = model) -> str: prompt = create_prompt(prompt) response = generate_response(prompt, model) print(format_response(response)) ask_alpaca("where is kuet located?") ```