|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline |
|
|
|
import time |
|
import gradio as gr |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
"microsoft/Phi-3.5-mini-instruct", |
|
torch_dtype=torch.bfloat16, |
|
|
|
trust_remote_code=True |
|
) |
|
model.load_adapter('./finetunedPEFTModel') |
|
|
|
tokenizer = AutoTokenizer.from_pretrained('./finetunedPEFTModel', trust_remote_code=True) |
|
|
|
|
|
|
|
|
|
def generateText(inputText="What is QLora finetuning?", num_tokens=200): |
|
|
|
|
|
|
|
|
|
|
|
prompt = "What is model regularization?" |
|
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=num_tokens) |
|
result = pipe(f'''{inputText}''') |
|
|
|
return result[0]['generated_text'] |
|
|
|
|
|
title = "Fine tuned Phi3.5 instruct model on OpenAssist dataset using QLora" |
|
description = "Fine tuned Phi3.5 instruct model on OpenAssist dataset using QLora. Running on CPU and thus a bit slow. So please be patient on submitting a request as it might take 15 to 20 minutes for a response." |
|
examples = [ |
|
["How can I optimize my web page for online search so that it is on top?", 200], |
|
["Can you give me an example of python script for Fibonacci series?", 200], |
|
["Can you explain what is Contrastive Loss in Deep Learning?", 200], |
|
["How are Sentence Transformers different from Huggingface Transformers?", 200], |
|
] |
|
|
|
demo = gr.Interface( |
|
generateText, |
|
inputs = [ |
|
gr.Textbox(label="Question that you want to ask"), |
|
gr.Slider(100, 500, value = 200, step=100, label="Number of tokens that you want in your output"), |
|
], |
|
outputs = [ |
|
gr.Text(), |
|
], |
|
title = title, |
|
description = description, |
|
examples = examples, |
|
cache_examples=False |
|
) |
|
demo.launch() |
|
|