import os import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer tokenizer = AutoTokenizer.from_pretrained("afrizalha/Bakpia-V1-0.5B-Javanese") model = AutoModelForCausalLM.from_pretrained("afrizalha/Bakpia-V1-0.5B-Javanese") text_streamer = TextStreamer(tokenizer) template = """<|im_start|>system <|im_end|> <|im_start|>user {prompt}<|im_end|> <|im_start|>assistant """ def generate(query, temp, top_p): inputs = template.format(prompt=query) inputs = tokenizer([inputs], return_tensors="pt").to(model.device) outputs = model.generate( streamer = text_streamer, inputs=inputs.input_ids, max_new_tokens=1024, do_sample=True, temperature=temp, top_p=top_p) outputs = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True) return outputs with gr.Blocks(theme=gr.themes.Soft()) as app: input = gr.Textbox(label="Prompt", value="Pripun kulo saged nyinaoni Basa Jawa kanthi sae?") output = gr.Textbox(label="Response", scale=2) temp = gr.Slider(label="Temperature", minimum=0.1, maximum=1.0, step=0.1, value=0.5) top_p = gr.Slider(label="Top P", minimum=0.1, maximum=1.0, step=0.1, value=0.5) gr.Interface( fn=generate, inputs=[input,temp,top_p], outputs=[output], allow_flagging="never", title="Bakpia V1 0.5B", description = """Bakpia V1 0.5B is a fine-tuned version of Qwen 2 0.5B Instruct. It is fine-tuned using massive synthetic data for Krama Javanese, where the prompts are generated by GPT-4o and the responses are generated by Claude 3 Haiku.""" ) app.launch()