import os
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer

tokenizer = AutoTokenizer.from_pretrained("afrizalha/Bakpia-V1-0.5B-Javanese")
model = AutoModelForCausalLM.from_pretrained("afrizalha/Bakpia-V1-0.5B-Javanese")
text_streamer = TextStreamer(tokenizer)

template = """<|im_start|>system
<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant
"""

def generate(query, temp, top_p):
    inputs = template.format(prompt=query)
    inputs = tokenizer([inputs], return_tensors="pt").to(model.device)
    outputs = model.generate(
        streamer = text_streamer,
        inputs=inputs.input_ids,
        max_new_tokens=1024,
        do_sample=True,
        temperature=temp,
        top_p=top_p)
    outputs = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)    
    return outputs
    
with gr.Blocks(theme=gr.themes.Soft()) as app:
    input = gr.Textbox(label="Prompt", value="Pripun kulo saged nyinaoni Basa Jawa kanthi sae?")
    output = gr.Textbox(label="Response", scale=2)
    temp = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.5)
    top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, step=0.1, value=0.5)
    gr.Interface(
        fn=generate,
         inputs=[input,temp,top_p],
         outputs=[output],
         allow_flagging="never",
         title="Bakpia V1 0.5B",
        description = """Bakpia V1 0.5B is a fine-tuned version of Qwen 2 0.5B Instruct. It is fine-tuned using massive synthetic data for Krama Javanese, where the prompts are generated by GPT-4o and the responses are generated by Claude 3 Haiku."""
         )
app.launch()