import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig import torch from peft import PeftModel device = torch.device("cuda" if torch.cuda.is_available() else "cpu") base_model_id = "mistralai/Mistral-7B-v0.1" ft_model_id = "asusevski/mistraloo-sft" tokenizer = AutoTokenizer.from_pretrained( base_model_id, add_bos_token=True ) base_model_id = "mistralai/Mistral-7B-v0.1" bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16 ) base_model = AutoModelForCausalLM.from_pretrained( base_model_id, quantization_config=bnb_config, device_map="auto", trust_remote_code=True, token=True ) model = PeftModel.from_pretrained(base_model, ft_model_id).to(device) model.eval() def uwaterloo_output(post_title, post_text): prompt = f""" Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: Respond to the reddit post in the style of a University of Waterloo student. ### Input: {post_title} {post_text} ### Response: """ model_input = tokenizer(prompt, return_tensors="pt").to(device) with torch.no_grad(): model_output = model.generate(**model_input, max_new_tokens=256, repetition_penalty=1.15)[0] output = tokenizer.decode(model_output, skip_special_tokens=True) return output.split('### Response:\n')[-1] iface = gr.Interface( fn=uwaterloo_output, inputs=[ gr.Textbox("", label="Post Title"), gr.Textbox("", label="Post Text"), ], outputs=gr.Textbox("", label="Mistraloo-SFT") ) iface.launch() # base_model_id = "mistralai/Mistral-7B-v0.1" # bnb_config = BitsAndBytesConfig( # load_in_4bit=True, # bnb_4bit_use_double_quant=True, # bnb_4bit_quant_type="nf4", # bnb_4bit_compute_dtype=torch.bfloat16 # ) # base_model = AutoModelForCausalLM.from_pretrained( # base_model_id, # Mistral, same as before # quantization_config=bnb_config, # Same quantization config as before # device_map="auto", # trust_remote_code=True, # use_auth_token=True # ) # ft_model = PeftModel.from_pretrained(base_model, "mistral-mistraloo/checkpoint-500")