Chintan-Shah's picture
Update app.py
595a507 verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import time
import gradio as gr
# bnb_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_compute_dtype=torch.bfloat16,
# )
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Phi-3.5-mini-instruct",
torch_dtype=torch.bfloat16,
# quantization_config=bnb_config,
trust_remote_code=True
)
model.load_adapter('./finetunedPEFTModel')
tokenizer = AutoTokenizer.from_pretrained('./finetunedPEFTModel', trust_remote_code=True)
# tokenizer.pad_token = tokenizer.unk_token
# tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct", trust_remote_code=True)
def generateText(inputText="What is QLora finetuning?", num_tokens=200):
# pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=num_tokens)
# result = pipe(f'''[INST] {inputText} [/INST]''')
# print(result[0]['generated_text'])
prompt = "What is model regularization?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=num_tokens)
result = pipe(f'''{inputText}''')
return result[0]['generated_text']
title = "Fine tuned Phi3.5 instruct model on OpenAssist dataset using QLora"
description = "Fine tuned Phi3.5 instruct model on OpenAssist dataset using QLora. Running on CPU and thus a bit slow. So please be patient on submitting a request as it might take 15 to 20 minutes for a response."
examples = [
["How can I optimize my web page for online search so that it is on top?", 200],
["Can you give me an example of python script for Fibonacci series?", 200],
["Can you explain what is Contrastive Loss in Deep Learning?", 200],
["How are Sentence Transformers different from Huggingface Transformers?", 200],
]
demo = gr.Interface(
generateText,
inputs = [
gr.Textbox(label="Question that you want to ask"),
gr.Slider(100, 500, value = 200, step=100, label="Number of tokens that you want in your output"),
],
outputs = [
gr.Text(),
],
title = title,
description = description,
examples = examples,
cache_examples=False
)
demo.launch()