LiteLlama / app.py
Tonic's picture
Update app.py
975dca5
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
title = """🙋🏻‍♂️Welcome to🌟Tonic's 🦙LiteLlama📳On-Device Chat!"""
description = """
You can use this Space to test out the current model [ahxt/LiteLlama-460M-1T](https://huggingface.co/ahxt/LiteLlama-460M-1T) You can also use 🦙LiteLlama📳On-Device Chat by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic1/Litellama?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
Join us : 🌟TeamTonic is always making cool demos! Join our active🛠️builder's community on👻Discord: [Discord](https://discord.gg/nXx5wbX9) On🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟[PolyGPT](https://github.com/tonic-ai/polygpt-alpha)
"""
model_path = 'ahxt/LiteLlama-460M-1T'
model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
def generate_text(question):
prompt = f'Q: {question}\nA:'
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
tokens = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id)
response = tokenizer.decode(tokens[0], skip_special_tokens=False)
return response.split('\nA: ')[-1]
# Gradio Blocks interface
with gr.Blocks() as iface:
gr.Markdown(title)
gr.Markdown(description)
with gr.Row():
with gr.Column():
question = gr.Textbox(label="Speak to LiteLlama", lines=2, placeholder="What are the best Japanese gardens in Paris?")
submit_button = gr.Button("Submit")
with gr.Column():
output = gr.Textbox(label="🦙LiteLlama", lines=6)
submit_button.click(fn=generate_text, inputs=question, outputs=output)
# Launch the interface
iface.launch()