|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
from PIL import Image |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("neulab/UIX-Qwen2") |
|
model = AutoModelForSequenceClassification.from_pretrained("neulab/UIX-Qwen2") |
|
|
|
|
|
def predict_coordinates(screenshot, prompt): |
|
|
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
outputs = model(**inputs) |
|
|
|
|
|
coordinates = {"x": 100, "y": 200} |
|
|
|
return coordinates |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# UIX-Qwen2: Predict Coordinates for UI Interactions") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
screenshot = gr.Image(type="pil", label="Upload Screenshot") |
|
prompt = gr.Textbox(label="Prompt (e.g., 'Click on Submit button')") |
|
with gr.Column(): |
|
output = gr.JSON(label="Predicted Coordinates (x, y)") |
|
|
|
submit_button = gr.Button("Get Coordinates") |
|
submit_button.click(predict_coordinates, inputs=[screenshot, prompt], outputs=output) |
|
|
|
|
|
demo.launch() |
|
|