Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import re | |
import spaces | |
from leo.inference import inference | |
MESH_DIR = 'assets/mesh' | |
MESH_NAMES = sorted([os.path.splitext(fname)[0] for fname in os.listdir(MESH_DIR)]) | |
STEP_COUNTS = 6 | |
def change_scene(dropdown_scene: str): | |
# reset 3D scene and chatbot history | |
return os.path.join(MESH_DIR, f'{dropdown_scene}.glb') | |
with gr.Blocks(title='LEO Demo') as demo: | |
gr.HTML(value="<h1 align='center'>Task-oriented Sequential Grounding in 3D Scenes </h1>") | |
with gr.Row(): | |
with gr.Column(scale=5): | |
dropdown_scene = gr.Dropdown( | |
choices=MESH_NAMES, | |
value='scene0050_00', | |
interactive=True, | |
label='Select a 3D scene', | |
) | |
model_3d = gr.Model3D( | |
value=os.path.join(MESH_DIR, f'scene0050_00.glb'), | |
clear_color=[0.0, 0.0, 0.0, 0.0], | |
label='3D Scene', | |
camera_position=(80, 100, 6), | |
height=659, | |
) | |
gr.HTML( | |
"""<center><strong> | |
π SCROLL and DRAG on the 3D Scene | |
to zoom in/out and rotate. Press CTRL and DRAG to pan. | |
</strong></center> | |
""" | |
) | |
dropdown_scene.change( | |
fn=change_scene, | |
inputs=[dropdown_scene], | |
outputs=[model_3d], | |
queue=False | |
) | |
# LEO task-to-plan inference wrapper | |
def leo_task_to_plan(task_description): | |
task_input = { | |
"task_description": task_description, | |
"scan_id": "scene0050_00" | |
} | |
plan = inference("scene0050_00", task_input, predict_mode=True) | |
plan = plan[0]['pred_plan_text'] | |
# parts = re.split(r'(\d+\.)', plan)[1:] | |
# steps = [parts[i] + parts[i + 1].rstrip() for i in range(0, len(parts), 2)] | |
return plan | |
# LEO ground inference wrapper | |
def leo_plan_to_masks(task_description, *action_steps): | |
formatted_action_steps = [ | |
{"action": step, "target_id": "unknown", "label": "unknown"} for step in action_steps if step != "" | |
] | |
task_input = { | |
"task_description": task_description, | |
"action_steps": formatted_action_steps, | |
"scan_id": "scene0050_00" | |
} | |
masks = inference("scene0050_00", task_input, predict_mode=False) | |
masks = [tensor.item() for tensor in masks] | |
return [f"assets/mask/scene0050_00/scene0050_00_obj_{mask}.glb" for mask in masks] + ["assets/mask/scene0050_00/scene0050_00_obj_empty.glb"] * (STEP_COUNTS - len(masks)) | |
# LEO task-to-plan and ground inference wrapper | |
def leo_task_to_plan_and_masks(task_description): | |
task_input = { | |
"task_description": task_description, | |
"scan_id": "scene0050_00" | |
} | |
plan = inference("scene0050_00", task_input, predict_mode=True) | |
plan_text = plan[0]['pred_plan_text'] | |
parts = re.split(r'(\d+\.)', plan_text)[1:] | |
steps = [parts[i] + parts[i + 1].rstrip() for i in range(0, len(parts), 2)] | |
steps += ["### PLANNING HAS ENDED, SEE ABOVE FOR DETAILS ###"] * (STEP_COUNTS - len(steps)) | |
masks = plan[0]['predict_object_id'] | |
mask_paths = [f"assets/mask/scene0050_00/scene0050_00_obj_{mask}.glb" for mask in masks] | |
mask_paths += ["assets/mask/scene0050_00/scene0050_00_obj_empty.glb"] * (STEP_COUNTS - len(masks)) # fill with empty mask | |
output = [] | |
for i in range(STEP_COUNTS): | |
output.append(steps[i]) | |
output.append(mask_paths[i]) | |
return output | |
# with gr.Tab("LEO Task-to-Plan"): | |
# gr.Interface( | |
# fn=leo_task_to_plan, | |
# inputs=[gr.Textbox(label="Task Description")], | |
# outputs=["text"], | |
# examples=[ | |
# ["Freshen up in the bathroom."] | |
# ], | |
# title="LEO Task-to-Plan: Input task, Output plan text" | |
# ) | |
with gr.Tab("LEO Plan-to-Masks"): | |
gr.Interface( | |
fn=leo_plan_to_masks, | |
inputs=[gr.Textbox(label="Task Description")] + [gr.Textbox(label=f"Action Step {i+1}") for i in range(STEP_COUNTS)], | |
outputs=[gr.Model3D( | |
clear_color=[0.0, 0.0, 0.0, 0.0], camera_position=(80, 100, 6), label=f"3D Model for Step {i+1} (if the step exists)") for i in range(STEP_COUNTS)], | |
examples=[ | |
[ | |
"Start Working at the desk.", | |
"1. Walk to the desk.", | |
"2. Sit on the brown leather sofa chair in front of the desk.", | |
"3. Turn on the opened laptop in front of you on the desk.", | |
"4. Grab the cup beside the laptop to drink." | |
] + [""] * (STEP_COUNTS - 4) | |
], | |
title="LEO Plan-to-Masks: Input plan, Output 3D Masks for each step, Red denotes predicted target object", | |
description="Please input a task description and action steps. Examples can be found at the bottom of the interface." | |
) | |
with gr.Tab("LEO Task-to-Plan and Masks"): | |
gr.Interface( | |
fn=leo_task_to_plan_and_masks, | |
inputs=[gr.Textbox(label="Task Description")], | |
outputs=[ | |
item for sublist in zip( | |
[gr.Textbox(label=f"Action Step {i+1}") for i in range(STEP_COUNTS)], | |
[gr.Model3D( | |
clear_color=[0.0, 0.0, 0.0, 0.0], | |
camera_position=(80, 100, 6), | |
label=f"3D Model for Step {i+1} (if the step exists)" | |
) for i in range(STEP_COUNTS)] | |
) for item in sublist | |
], | |
examples=[ | |
["Start Working at the desk."] | |
], | |
title="LEO Task-to-Plan and Masks: Input task, Output plan text and 3D Masks for each step, Red denotes predicted target object", | |
description="Please input a task description. Examples can be found at the bottom of the interface." | |
# js=""" | |
# function() { | |
# const stepCounts = """ + str(STEP_COUNTS) + """; | |
# const stepElems = document.querySelectorAll('.output_interface .textbox_output'); | |
# const modelElems = document.querySelectorAll('.output_interface .model3d_output'); | |
# for (let i = 0; i < stepCounts; i++) { | |
# if (stepElems[i].value === '### PLANNING HAS ENDED, SEE ABOVE FOR DETAILS ###' || modelElems[i].src.includes('scene0050_00_obj_empty.glb')) { | |
# stepElems[i].style.display = 'none'; | |
# modelElems[i].style.display = 'none'; | |
# } | |
# } | |
# } | |
# """ | |
) | |
demo.queue().launch(share=True, allowed_paths=['assets']) | |