File size: 1,373 Bytes
5ffad46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from PIL import Image
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("neulab/UIX-Qwen2")
model = AutoModelForSequenceClassification.from_pretrained("neulab/UIX-Qwen2")
# Function to process the screenshot and prompt
def predict_coordinates(screenshot, prompt):
# Process the image and prompt here
# For now, we'll use the prompt as input (actual screenshot integration needs proper pre-processing)
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model(**inputs)
# Example response (fake coordinates for now)
coordinates = {"x": 100, "y": 200} # This would come from the model output
return coordinates
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# UIX-Qwen2: Predict Coordinates for UI Interactions")
with gr.Row():
with gr.Column():
screenshot = gr.Image(type="pil", label="Upload Screenshot")
prompt = gr.Textbox(label="Prompt (e.g., 'Click on Submit button')")
with gr.Column():
output = gr.JSON(label="Predicted Coordinates (x, y)")
submit_button = gr.Button("Get Coordinates")
submit_button.click(predict_coordinates, inputs=[screenshot, prompt], outputs=output)
# Launch the Gradio app
demo.launch()
|