|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
|
|
model = AutoModelForCausalLM.from_pretrained("checkpoint",trust_remote_code=True) |
|
tokenizer = AutoTokenizer.from_pretrained("checkpoint", trust_remote_code=True) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
def inference(prompt, count): |
|
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) |
|
result = pipe(f"### Human: {prompt}",max_new_tokens=count) |
|
out_text = result[0]['generated_text'] |
|
return out_text |
|
|
|
title = "TSAI S21 Assignment: Adaptive QLoRA training on open assist oasst1 dataset, using microsoft/phi2 model" |
|
description = "A simple Gradio interface that accepts a context and generates GPT like text " |
|
examples = [["What is a large language model?","200"], |
|
["Explain about monopsony","200"] |
|
] |
|
|
|
|
|
demo = gr.Interface( |
|
inference, |
|
inputs = [gr.Textbox(placeholder="Enter a prompt"), gr.Textbox(placeholder="Enter number of characters you want to generate")], |
|
outputs = [gr.Textbox(label="Chat GPT like text")], |
|
title = title, |
|
description = description, |
|
examples = examples |
|
) |
|
demo.launch() |