test-gemma2-2bb / app.py
thfname's picture
Update app.py
19c36a8 verified
raw
history blame
578 Bytes
import torch
import transformers
import gradio as gr
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
model_name = "thfname/test-gemma2-2b"
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_name)
def gt (tx):
input_ids = tokenizer(tx, return_tensors="pt").to("cuda")
outputs = model.generate(**input_ids, max_new_tokens=32)
return tokenizer.decode(outputs[0])
demo = gr.Interface(fn=gt, inputs="text", outputs="text")
demo.launch()