shahidul034
/

KUET_information_LLM

Generated from Trainer

Model card Files Files and versions Community

KUET_information_LLM / README.md

shahidul034's picture

Update README.md

533070b 12 months ago

|

2.39 kB

	# KUET CHATBOT
	Contributed by Jesiara Khatun and Sadia Islam (CSE 2K19, KUET)
	```
	!pip install peft -q
	!pip install transformers[sentencepiece] -q
	!pip install sentencepiece
	!pip install accelerate
	!pip install bitsandbytes
	```

	```
	import torch
	from peft import PeftModel
	import transformers
	import textwrap
	from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
	from transformers.generation.utils import GreedySearchDecoderOnlyOutput

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	DEVICE
	tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")

	model = LlamaForCausalLM.from_pretrained(
	"abhishek/llama-2-7b-hf-small-shards",
	load_in_4bit=True,
	device_map="auto",
	)

	model = PeftModel.from_pretrained(model, "shahidul034/kuet_chatbot", torch_dtype=torch.float16)
	model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk
	model.config.bos_token_id = 1
	model.config.eos_token_id = 2

	model = model.eval()
	model = torch.compile(model)
	PROMPT_TEMPLATE = f"""
	Below is an instruction that describes a task. Write a response that appropriately completes the request.

	### Instruction:
	[INSTRUCTION]

	### Response:
	"""


	def create_prompt(instruction: str) -> str:
	return PROMPT_TEMPLATE.replace("[INSTRUCTION]", instruction)

	# print(create_prompt("What is (are) Glaucoma ?"))

	def generate_response(prompt: str, model: PeftModel) -> GreedySearchDecoderOnlyOutput:
	encoding = tokenizer(prompt, return_tensors="pt")
	input_ids = encoding["input_ids"].to(DEVICE)

	generation_config = GenerationConfig(
	temperature=0.1,
	top_p=0.75,
	repetition_penalty=1.1,
	)
	with torch.inference_mode():
	return model.generate(
	input_ids=input_ids,
	generation_config=generation_config,
	return_dict_in_generate=True,
	output_scores=True,
	max_new_tokens=256,
	)
	def format_response(response: GreedySearchDecoderOnlyOutput) -> str:
	decoded_output = tokenizer.decode(response.sequences[0])
	response = decoded_output.split("### Response:")[1].strip()
	return "\n".join(textwrap.wrap(response))

	def ask_alpaca(prompt: str, model: PeftModel = model) -> str:
	prompt = create_prompt(prompt)
	response = generate_response(prompt, model)
	print(format_response(response))

	ask_alpaca("where is kuet located?")

	```