Spaces:

Chintan-Shah
/

FineTuned_Phi3.5_QLora

Running

App Files Files Community

FineTuned_Phi3.5_QLora / app.py

Chintan-Shah

Update app.py

595a507 verified 2 months ago

raw

history blame contribute delete

2.33 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline

	import time
	import gradio as gr

	# bnb_config = BitsAndBytesConfig(
	# load_in_4bit=True,
	# bnb_4bit_quant_type="nf4",
	# bnb_4bit_compute_dtype=torch.bfloat16,
	# )

	model = AutoModelForCausalLM.from_pretrained(
	"microsoft/Phi-3.5-mini-instruct",
	torch_dtype=torch.bfloat16,
	# quantization_config=bnb_config,
	trust_remote_code=True
	)
	model.load_adapter('./finetunedPEFTModel')

	tokenizer = AutoTokenizer.from_pretrained('./finetunedPEFTModel', trust_remote_code=True)
	# tokenizer.pad_token = tokenizer.unk_token
	# tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct", trust_remote_code=True)


	def generateText(inputText="What is QLora finetuning?", num_tokens=200):

	# pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=num_tokens)
	# result = pipe(f'''[INST] {inputText} [/INST]''')
	# print(result[0]['generated_text'])

	prompt = "What is model regularization?"
	pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=num_tokens)
	result = pipe(f'''{inputText}''')

	return result[0]['generated_text']


	title = "Fine tuned Phi3.5 instruct model on OpenAssist dataset using QLora"
	description = "Fine tuned Phi3.5 instruct model on OpenAssist dataset using QLora. Running on CPU and thus a bit slow. So please be patient on submitting a request as it might take 15 to 20 minutes for a response."
	examples = [
	["How can I optimize my web page for online search so that it is on top?", 200],
	["Can you give me an example of python script for Fibonacci series?", 200],
	["Can you explain what is Contrastive Loss in Deep Learning?", 200],
	["How are Sentence Transformers different from Huggingface Transformers?", 200],
	]

	demo = gr.Interface(
	generateText,
	inputs = [
	gr.Textbox(label="Question that you want to ask"),
	gr.Slider(100, 500, value = 200, step=100, label="Number of tokens that you want in your output"),
	],
	outputs = [
	gr.Text(),
	],
	title = title,
	description = description,
	examples = examples,
	cache_examples=False
	)
	demo.launch()