UIX-Qwen2 / app.py
1inchcard's picture
Create app.py
5ffad46 verified
raw
history blame
1.37 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from PIL import Image
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("neulab/UIX-Qwen2")
model = AutoModelForSequenceClassification.from_pretrained("neulab/UIX-Qwen2")
# Function to process the screenshot and prompt
def predict_coordinates(screenshot, prompt):
# Process the image and prompt here
# For now, we'll use the prompt as input (actual screenshot integration needs proper pre-processing)
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model(**inputs)
# Example response (fake coordinates for now)
coordinates = {"x": 100, "y": 200} # This would come from the model output
return coordinates
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# UIX-Qwen2: Predict Coordinates for UI Interactions")
with gr.Row():
with gr.Column():
screenshot = gr.Image(type="pil", label="Upload Screenshot")
prompt = gr.Textbox(label="Prompt (e.g., 'Click on Submit button')")
with gr.Column():
output = gr.JSON(label="Predicted Coordinates (x, y)")
submit_button = gr.Button("Get Coordinates")
submit_button.click(predict_coordinates, inputs=[screenshot, prompt], outputs=output)
# Launch the Gradio app
demo.launch()