Spaces:
Runtime error
Runtime error
zfzhang-thu
commited on
Commit
•
9de012e
1
Parent(s):
b5cf65e
non-LFS commit
Browse files- .gitattributes +3 -0
- app.py +152 -0
- assets/meta/scannetv2-labels.combined.tsv +608 -0
- assets/meta/scannetv2_raw_categories.json +609 -0
- leo/grounding_head.py +20 -0
- leo/img_encoder.py +161 -0
- leo/inference.py +234 -0
- leo/model.py +477 -0
- leo/pcd_encoder.py +406 -0
- leo/utils.py +187 -0
- requirements.txt +8 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
38 |
+
*.glb filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
|
5 |
+
from leo.inference import inference
|
6 |
+
|
7 |
+
MESH_DIR = 'assets/mesh'
|
8 |
+
MESH_NAMES = sorted([os.path.splitext(fname)[0] for fname in os.listdir(MESH_DIR)])
|
9 |
+
STEP_COUNTS = 6
|
10 |
+
|
11 |
+
def change_scene(dropdown_scene: str):
|
12 |
+
# reset 3D scene and chatbot history
|
13 |
+
return os.path.join(MESH_DIR, f'{dropdown_scene}.glb')
|
14 |
+
|
15 |
+
with gr.Blocks(title='LEO Demo') as demo:
|
16 |
+
gr.HTML(value="<h1 align='center'>Task-oriented Sequential Grounding in 3D Scenes </h1>")
|
17 |
+
|
18 |
+
with gr.Row():
|
19 |
+
with gr.Column(scale=5):
|
20 |
+
dropdown_scene = gr.Dropdown(
|
21 |
+
choices=MESH_NAMES,
|
22 |
+
value='scene0050_00',
|
23 |
+
interactive=True,
|
24 |
+
label='Select a 3D scene',
|
25 |
+
)
|
26 |
+
model_3d = gr.Model3D(
|
27 |
+
value=os.path.join(MESH_DIR, f'scene0050_00.glb'),
|
28 |
+
clear_color=[0.0, 0.0, 0.0, 0.0],
|
29 |
+
label='3D Scene',
|
30 |
+
camera_position=(80, 100, 6),
|
31 |
+
height=659,
|
32 |
+
)
|
33 |
+
gr.HTML(
|
34 |
+
"""<center><strong>
|
35 |
+
👆 SCROLL and DRAG on the 3D Scene
|
36 |
+
to zoom in/out and rotate. Press CTRL and DRAG to pan.
|
37 |
+
</strong></center>
|
38 |
+
"""
|
39 |
+
)
|
40 |
+
|
41 |
+
dropdown_scene.change(
|
42 |
+
fn=change_scene,
|
43 |
+
inputs=[dropdown_scene],
|
44 |
+
outputs=[model_3d],
|
45 |
+
queue=False
|
46 |
+
)
|
47 |
+
|
48 |
+
# LEO task-to-plan inference wrapper
|
49 |
+
def leo_task_to_plan(task_description):
|
50 |
+
task_input = {
|
51 |
+
"task_description": task_description,
|
52 |
+
"scan_id": "scene0050_00"
|
53 |
+
}
|
54 |
+
plan = inference("scene0050_00", task_input, predict_mode=True)
|
55 |
+
plan = plan[0]['pred_plan_text']
|
56 |
+
# parts = re.split(r'(\d+\.)', plan)[1:]
|
57 |
+
# steps = [parts[i] + parts[i + 1].rstrip() for i in range(0, len(parts), 2)]
|
58 |
+
return plan
|
59 |
+
|
60 |
+
# LEO ground inference wrapper
|
61 |
+
def leo_plan_to_masks(task_description, *action_steps):
|
62 |
+
formatted_action_steps = [
|
63 |
+
{"action": step, "target_id": "unknown", "label": "unknown"} for step in action_steps if step != ""
|
64 |
+
]
|
65 |
+
task_input = {
|
66 |
+
"task_description": task_description,
|
67 |
+
"action_steps": formatted_action_steps,
|
68 |
+
"scan_id": "scene0050_00"
|
69 |
+
}
|
70 |
+
masks = inference("scene0050_00", task_input, predict_mode=False)
|
71 |
+
masks = [tensor.item() for tensor in masks]
|
72 |
+
return [f"assets/mask/scene0050_00/scene0050_00_obj_{mask}.glb" for mask in masks] + ["assets/mask/scene0050_00/scene0050_00_obj_empty.glb"] * (STEP_COUNTS - len(masks))
|
73 |
+
|
74 |
+
# LEO task-to-plan and ground inference wrapper
|
75 |
+
def leo_task_to_plan_and_masks(task_description):
|
76 |
+
task_input = {
|
77 |
+
"task_description": task_description,
|
78 |
+
"scan_id": "scene0050_00"
|
79 |
+
}
|
80 |
+
plan = inference("scene0050_00", task_input, predict_mode=True)
|
81 |
+
plan_text = plan[0]['pred_plan_text']
|
82 |
+
parts = re.split(r'(\d+\.)', plan_text)[1:]
|
83 |
+
steps = [parts[i] + parts[i + 1].rstrip() for i in range(0, len(parts), 2)]
|
84 |
+
steps += ["### PLANNING HAS ENDED, SEE ABOVE FOR DETAILS ###"] * (STEP_COUNTS - len(steps))
|
85 |
+
|
86 |
+
masks = plan[0]['predict_object_id']
|
87 |
+
mask_paths = [f"assets/mask/scene0050_00/scene0050_00_obj_{mask}.glb" for mask in masks]
|
88 |
+
mask_paths += ["assets/mask/scene0050_00/scene0050_00_obj_empty.glb"] * (STEP_COUNTS - len(masks)) # fill with empty mask
|
89 |
+
|
90 |
+
output = []
|
91 |
+
for i in range(STEP_COUNTS):
|
92 |
+
output.append(steps[i])
|
93 |
+
output.append(mask_paths[i])
|
94 |
+
return output
|
95 |
+
|
96 |
+
with gr.Tab("LEO Task-to-Plan"):
|
97 |
+
gr.Interface(
|
98 |
+
fn=leo_task_to_plan,
|
99 |
+
inputs=[gr.Textbox(label="Task Description")],
|
100 |
+
outputs=["text"],
|
101 |
+
examples=[
|
102 |
+
["Freshen up in the bathroom."]
|
103 |
+
],
|
104 |
+
title="LEO Task-to-Plan: Input task, Output plan text"
|
105 |
+
)
|
106 |
+
|
107 |
+
with gr.Tab("LEO Plan-to-Masks"):
|
108 |
+
gr.Interface(
|
109 |
+
fn=leo_plan_to_masks,
|
110 |
+
inputs=[gr.Textbox(label="Task Description")] + [gr.Textbox(label=f"Action Step {i+1}") for i in range(STEP_COUNTS)],
|
111 |
+
outputs=[gr.Model3D(
|
112 |
+
clear_color=[0.0, 0.0, 0.0, 0.0], camera_position=(80, 100, 6), label=f"3D Model for Step {i+1} (if the step exists)") for i in range(STEP_COUNTS)],
|
113 |
+
examples=[
|
114 |
+
["Retrieve an item from the backpack.", "1. Walk to the ottoman located near the brown leather armchair.", "2. Choose the black backpack resting on this ottoman.", "3. Open the backpack to find the needed item."] + [""] * (STEP_COUNTS - 3)
|
115 |
+
],
|
116 |
+
title="LEO Plan-to-Masks: Input plan, Output 3D Masks for each step, Red denotes predicted target object"
|
117 |
+
)
|
118 |
+
|
119 |
+
with gr.Tab("LEO Task-to-Plan and Masks"):
|
120 |
+
gr.Interface(
|
121 |
+
fn=leo_task_to_plan_and_masks,
|
122 |
+
inputs=[gr.Textbox(label="Task Description")],
|
123 |
+
outputs=[
|
124 |
+
item for sublist in zip(
|
125 |
+
[gr.Textbox(label=f"Action Step {i+1}") for i in range(STEP_COUNTS)],
|
126 |
+
[gr.Model3D(
|
127 |
+
clear_color=[0.0, 0.0, 0.0, 0.0],
|
128 |
+
camera_position=(80, 100, 6),
|
129 |
+
label=f"3D Model for Step {i+1} (if the step exists)"
|
130 |
+
) for i in range(STEP_COUNTS)]
|
131 |
+
) for item in sublist
|
132 |
+
],
|
133 |
+
examples=[
|
134 |
+
["Retrieve an item from the backpack."]
|
135 |
+
],
|
136 |
+
title="LEO Task-to-Plan and Masks: Input task, Output plan text and 3D Masks for each step, Red denotes predicted target object",
|
137 |
+
# js="""
|
138 |
+
# function() {
|
139 |
+
# const stepCounts = """ + str(STEP_COUNTS) + """;
|
140 |
+
# const stepElems = document.querySelectorAll('.output_interface .textbox_output');
|
141 |
+
# const modelElems = document.querySelectorAll('.output_interface .model3d_output');
|
142 |
+
# for (let i = 0; i < stepCounts; i++) {
|
143 |
+
# if (stepElems[i].value === '### PLANNING HAS ENDED, SEE ABOVE FOR DETAILS ###' || modelElems[i].src.includes('scene0050_00_obj_empty.glb')) {
|
144 |
+
# stepElems[i].style.display = 'none';
|
145 |
+
# modelElems[i].style.display = 'none';
|
146 |
+
# }
|
147 |
+
# }
|
148 |
+
# }
|
149 |
+
# """
|
150 |
+
)
|
151 |
+
|
152 |
+
demo.queue().launch(share=True, allowed_paths=['assets'])
|
assets/meta/scannetv2-labels.combined.tsv
ADDED
@@ -0,0 +1,608 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
id raw_category category count nyu40id eigen13id nyuClass nyu40class eigen13class ModelNet40 ModelNet10 ShapeNetCore55 synsetoffset wnsynsetid wnsynsetkey mpcat40 mpcat40index
|
2 |
+
1 wall wall 8277 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
3 |
+
2 chair chair 4646 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
4 |
+
22 books book 1678 23 2 book books Books n02870526 book.n.11 objects 39
|
5 |
+
3 floor floor 1553 2 5 floor floor Floor n03365592 floor.n.01 floor 2
|
6 |
+
5 door door 1483 8 12 door door Wall door n03221720 door.n.01 door 4
|
7 |
+
1163 object object 1313 40 7 otherprop Objects objects 39
|
8 |
+
16 window window 1209 9 13 window window Window n04587648 window.n.01 window 9
|
9 |
+
4 table table 1170 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
10 |
+
56 trash can trash can 1090 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
|
11 |
+
13 pillow pillow 937 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8
|
12 |
+
15 picture picture 862 11 8 picture picture Picture n03931044 picture.n.01 picture 6
|
13 |
+
41 ceiling ceiling 806 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17
|
14 |
+
26 box box 775 29 7 box box Objects n02883344 box.n.01 objects 39
|
15 |
+
161 doorframe doorframe 768 8 12 door door Wall door doorframe.n.01 door 4
|
16 |
+
19 monitor monitor 765 40 7 monitor otherprop Objects monitor monitor tv or monitor 3211117 n03782190 monitor.n.04 objects 39
|
17 |
+
7 cabinet cabinet 731 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
18 |
+
9 desk desk 680 14 10 desk desk Table desk desk table 4379243 n03179701 desk.n.01 table 5
|
19 |
+
8 shelf shelf 641 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
20 |
+
10 office chair office chair 595 5 4 chair chair Chair chair chair chair 3001627 n04373704 swivel_chair.n.01 chair 3
|
21 |
+
31 towel towel 570 27 7 towel towel Objects n04459362 towel.n.01 towel 20
|
22 |
+
6 couch couch 502 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10
|
23 |
+
14 sink sink 488 34 7 sink sink Objects sink n04223580 sink.n.01 sink 15
|
24 |
+
48 backpack backpack 479 40 7 backpack otherprop Objects n02769748 backpack.n.01 objects 39
|
25 |
+
28 lamp lamp 419 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
26 |
+
11 bed bed 370 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
|
27 |
+
18 bookshelf bookshelf 360 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
28 |
+
71 mirror mirror 349 19 7 mirror mirror Objects n03773035 mirror.n.01 mirror 21
|
29 |
+
21 curtain curtain 347 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12
|
30 |
+
40 plant plant 331 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14
|
31 |
+
52 whiteboard whiteboard 327 30 7 whiteboard whiteboard Objects n03211616 display_panel.n.01 board_panel 35
|
32 |
+
96 radiator radiator 322 39 6 radiator otherfurniture Furniture n04041069 radiator.n.02 misc 40
|
33 |
+
22 book book 318 23 2 book books Books n02870526 book.n.11 objects 39
|
34 |
+
29 kitchen cabinet kitchen cabinet 310 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7
|
35 |
+
49 toilet paper toilet paper 291 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39
|
36 |
+
29 kitchen cabinets kitchen cabinet 289 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
37 |
+
23 armchair armchair 281 5 4 chair chair Chair chair chair chair 3001627 n02738535 armchair.n.01 chair 3
|
38 |
+
63 shoes shoe 272 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
|
39 |
+
24 coffee table coffee table 258 7 10 coffee table table Table table table table 4379243 n03063968 coffee_table.n.01 table 5
|
40 |
+
17 toilet toilet 256 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 toilet 18
|
41 |
+
47 bag bag 252 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
|
42 |
+
32 clothes clothes 248 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
|
43 |
+
46 keyboard keyboard 246 40 7 keyboard otherprop Objects keyboard computer keyboard 3085013 n03085013 computer_keyboard.n.01 objects 39
|
44 |
+
65 bottle bottle 226 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
45 |
+
97 recycling bin recycling bin 225 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
|
46 |
+
34 nightstand nightstand 224 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 chest_of_drawers 13
|
47 |
+
38 stool stool 221 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19
|
48 |
+
33 tv tv 219 25 11 television television TV tv or monitor 3211117 n03211117 display.n.06 tv_monitor 22
|
49 |
+
75 file cabinet file cabinet 217 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
50 |
+
36 dresser dresser 213 17 6 dresser dresser Furniture dresser dresser n03015254 chest_of_drawers.n.01 chest_of_drawers 13
|
51 |
+
64 computer tower computer tower 203 40 7 computer otherprop Objects n03082979 computer.n.01 objects 39
|
52 |
+
32 clothing clothes 165 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
|
53 |
+
101 telephone telephone 164 40 7 telephone otherprop Objects telephone 4401088 n04401088 telephone.n.01 objects 39
|
54 |
+
130 cup cup 157 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
|
55 |
+
27 refrigerator refrigerator 154 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 appliances 37
|
56 |
+
44 end table end table 147 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
57 |
+
131 jacket jacket 146 40 7 jacket otherprop Objects n03589791 jacket.n.01 clothes 38
|
58 |
+
55 shower curtain shower curtain 144 28 7 shower curtain shower curtain Objects curtain n04209239 shower_curtain.n.01 curtain 12
|
59 |
+
42 bathtub bathtub 144 36 7 bathtub bathtub Objects bathtub bathtub tub 2808440 n02808440 bathtub.n.01 bathtub 25
|
60 |
+
59 microwave microwave 141 40 7 microwave otherprop Objects microwave 3761084 n03761084 microwave.n.02 appliances 37
|
61 |
+
159 kitchen counter kitchen counter 140 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26
|
62 |
+
74 sofa chair sofa chair 129 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
63 |
+
82 paper towel dispenser paper towel dispenser 129 40 7 paper towel dispenser otherprop Objects objects 39
|
64 |
+
1164 bathroom vanity bathroom vanity 126 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 table 5
|
65 |
+
93 suitcase suitcase 118 40 7 luggage otherprop Objects n02773838 bag.n.06 objects 39
|
66 |
+
77 laptop laptop 111 40 7 laptop otherprop Objects laptop laptop 3642806 n03642806 laptop.n.01 objects 39
|
67 |
+
67 ottoman ottoman 111 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19
|
68 |
+
128 shower walls shower wall 109 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
69 |
+
50 printer printer 106 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37
|
70 |
+
35 counter counter 104 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26
|
71 |
+
69 board board 100 38 7 board otherstructure Objects board_panel 35
|
72 |
+
100 soap dispenser soap dispenser 99 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39
|
73 |
+
62 stove stove 95 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37
|
74 |
+
105 light light 93 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28
|
75 |
+
1165 closet wall closet wall 90 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
76 |
+
165 mini fridge mini fridge 87 24 6 refridgerator refridgerator Furniture n03273913 electric_refrigerator.n.01 appliances 37
|
77 |
+
7 cabinets cabinet 79 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
78 |
+
5 doors door 76 8 12 door door Wall door n03221720 door.n.01 door 4
|
79 |
+
76 fan fan 75 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40
|
80 |
+
230 tissue box tissue box 73 40 7 tissue box otherprop Objects n02883344 box.n.01 objects 39
|
81 |
+
54 blanket blanket 72 40 7 blanket otherprop Objects n02849154 blanket.n.01 objects 39
|
82 |
+
125 bathroom stall bathroom stall 71 38 7 otherstructure Objects n02873839 booth.n.02 misc 40
|
83 |
+
72 copier copier 70 40 7 otherprop Objects n03257586 duplicator.n.01 appliances 37
|
84 |
+
68 bench bench 66 39 6 bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34
|
85 |
+
145 bar bar 66 38 7 bar otherstructure Objects n02788689 bar.n.03 misc 40
|
86 |
+
157 soap dish soap dish 65 40 7 soap dish otherprop Objects n04254009 soap_dish.n.01 objects 39
|
87 |
+
1166 laundry hamper laundry hamper 65 40 7 laundry basket otherprop Objects objects 39
|
88 |
+
132 storage bin storage bin 63 40 7 storage bin otherprop Objects objects 39
|
89 |
+
1167 bathroom stall door bathroom stall door 62 8 12 door door Wall door n03221720 door.n.01 door 4
|
90 |
+
232 light switch light switch 61 38 7 light switch otherstructure Objects n04372370 switch.n.01 misc 40
|
91 |
+
134 coffee maker coffee maker 61 40 7 otherprop Objects n03063338 coffee_maker.n.01 appliances 37
|
92 |
+
51 tv stand tv stand 61 39 6 tv stand otherfurniture Furniture tv_stand n03290653 entertainment_center.n.01 furniture 36
|
93 |
+
250 decoration decoration 60 40 7 otherprop Objects n03169390 decoration.n.01 misc 40
|
94 |
+
1168 ceiling light ceiling light 59 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28
|
95 |
+
342 range hood range hood 59 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 misc 40
|
96 |
+
89 blackboard blackboard 58 38 7 blackboard otherstructure Objects n02846511 blackboard.n.01 board_panel 35
|
97 |
+
103 clock clock 58 40 7 clock otherprop Objects clock 3046257 n03046257 clock.n.01 objects 39
|
98 |
+
99 wardrobe closet wardrobe 54 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
|
99 |
+
95 rail rail 53 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30
|
100 |
+
154 bulletin board bulletin board 53 38 7 board otherstructure Objects n03211616 display_panel.n.01 board_panel 35
|
101 |
+
140 mat mat 52 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2
|
102 |
+
1169 trash bin trash bin 52 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
|
103 |
+
193 ledge ledge 51 38 7 otherstructure Objects n09337253 ledge.n.01 misc 40
|
104 |
+
116 seat seat 49 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36
|
105 |
+
202 mouse mouse 49 40 7 mouse otherprop Objects n03793489 mouse.n.04 objects 39
|
106 |
+
73 basket basket 48 40 7 basket otherprop Objects basket 2801938 n02801938 basket.n.01 objects 39
|
107 |
+
78 shower shower 48 38 7 otherstructure Objects n04208936 shower.n.01 shower 23
|
108 |
+
1170 dumbbell dumbbell 48 40 7 otherprop Objects n03255030 dumbbell.n.01 objects 39
|
109 |
+
79 paper paper 46 26 7 paper paper Objects n14974264 paper.n.01 objects 39
|
110 |
+
80 person person 46 31 7 person person Objects person n05217688 person.n.02 misc 40
|
111 |
+
141 windowsill windowsill 45 38 7 otherstructure Objects n04590263 windowsill.n.01 window 9
|
112 |
+
57 closet closet 45 39 6 wardrobe otherfurniture Furniture wardrobe misc 40
|
113 |
+
102 bucket bucket 45 40 7 bucket otherprop Objects n02909870 bucket.n.01 misc 40
|
114 |
+
261 sign sign 44 40 7 sign otherprop Objects n04217882 signboard.n.01 objects 39
|
115 |
+
118 speaker speaker 43 40 7 speaker otherprop Objects speaker 3691459 n03691459 loudspeaker.n.01 objects 39
|
116 |
+
136 dishwasher dishwasher 43 38 7 dishwasher otherstructure Objects dishwasher 3207941 n03207941 dishwasher.n.01 appliances 37
|
117 |
+
98 container container 43 40 7 container otherprop Objects n03094503 container.n.01 objects 39
|
118 |
+
1171 stair rail stair rail 42 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30
|
119 |
+
170 shower curtain rod shower curtain rod 42 40 7 otherprop Objects curtain 12
|
120 |
+
1172 tube tube 41 40 7 otherprop Objects misc 40
|
121 |
+
1173 bathroom cabinet bathroom cabinet 39 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
122 |
+
79 papers paper 39 26 7 paper paper Objects n14974264 paper.n.01 objects 39
|
123 |
+
221 storage container storage container 39 40 7 container otherprop Objects objects 39
|
124 |
+
570 paper bag paper bag 39 37 7 bag bag Objects n04122825 sack.n.01 objects 39
|
125 |
+
138 paper towel roll paper towel roll 39 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20
|
126 |
+
168 ball ball 39 40 7 ball otherprop Objects objects 39
|
127 |
+
276 closet doors closet door 38 8 12 door door Wall door n03221720 door.n.01 door 4
|
128 |
+
106 laundry basket laundry basket 37 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39
|
129 |
+
214 cart cart 37 40 7 cart otherprop Objects n03484083 handcart.n.01 shelving 31
|
130 |
+
276 closet door closet door 35 8 12 door door Wall door n03221720 door.n.01 door 4
|
131 |
+
323 dish rack dish rack 35 40 7 dish rack otherprop Objects n03207630 dish_rack.n.01 objects 39
|
132 |
+
58 stairs stairs 35 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16
|
133 |
+
86 blinds blinds 35 13 13 blinds blinds Window n02851099 blind.n.03 blinds 32
|
134 |
+
2 stack of chairs chair 35 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
135 |
+
399 purse purse 34 40 7 purse otherprop Objects n02774152 bag.n.04 objects 39
|
136 |
+
121 bicycle bicycle 33 40 7 bicycle otherprop Objects bicycle 2834778 n02834778 bicycle.n.01 objects 39
|
137 |
+
185 tray tray 32 40 7 tray otherprop Objects n04476259 tray.n.01 objects 39
|
138 |
+
300 plunger plunger 30 40 7 otherprop Objects n03970156 plunger.n.03 objects 39
|
139 |
+
180 paper cutter paper cutter 30 40 7 paper cutter otherprop Objects n03886940 paper_cutter.n.01 objects 39
|
140 |
+
163 toilet paper dispenser toilet paper dispenser 29 40 7 otherprop Objects objects 39
|
141 |
+
26 boxes box 29 29 7 box box Objects n02883344 box.n.01 objects 39
|
142 |
+
66 bin bin 28 40 7 bin otherprop Objects n02839910 bin.n.01 objects 39
|
143 |
+
208 toilet seat cover dispenser toilet seat cover dispenser 28 40 7 otherprop Objects objects 39
|
144 |
+
112 guitar guitar 28 40 7 guitar otherprop Objects guitar guitar 3467517 n03467517 guitar.n.01 objects 39
|
145 |
+
540 mailboxes mailbox 28 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40
|
146 |
+
395 handicap bar handicap bar 27 38 7 bar otherstructure Objects misc 40
|
147 |
+
166 fire extinguisher fire extinguisher 27 40 7 fire extinguisher otherprop Objects n03345837 fire_extinguisher.n.01 misc 40
|
148 |
+
122 ladder ladder 27 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 stairs 16
|
149 |
+
120 column column 26 38 7 column otherstructure Objects n03074380 column.n.06 column 24
|
150 |
+
107 pipe pipe 25 40 7 pipe otherprop Objects n03944672 pipe.n.02 misc 40
|
151 |
+
283 vacuum cleaner vacuum cleaner 25 40 7 otherprop Objects n04517823 vacuum.n.04 objects 39
|
152 |
+
88 plate plate 24 40 7 plate otherprop Objects n03959485 plate.n.04 objects 39
|
153 |
+
90 piano piano 24 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36
|
154 |
+
177 water cooler water cooler 24 39 6 water cooler otherfurniture Furniture n04559166 water_cooler.n.01 misc 40
|
155 |
+
1174 cd case cd case 24 40 7 otherprop Objects objects 39
|
156 |
+
562 bowl bowl 24 40 7 bowl otherprop Objects bowl bowl 2880940 n02880940 bowl.n.03 objects 39
|
157 |
+
1175 closet rod closet rod 24 40 7 otherprop Objects n04100174 rod.n.01 misc 40
|
158 |
+
1156 bathroom counter bathroom counter 24 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26
|
159 |
+
84 oven oven 23 38 7 oven otherstructure Objects n03862676 oven.n.01 appliances 37
|
160 |
+
104 stand stand 23 39 6 stand otherfurniture Furniture table table table 4379243 n04301000 stand.n.04 table 5
|
161 |
+
229 scale scale 23 40 7 scale otherprop Objects n04141975 scale.n.07 objects 39
|
162 |
+
70 washing machine washing machine 23 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37
|
163 |
+
325 broom broom 22 40 7 broom otherprop Objects n02906734 broom.n.01 objects 39
|
164 |
+
169 hat hat 22 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38
|
165 |
+
128 shower wall shower wall 22 1 12 wall wall Wall n04208936 shower.n.01 wall 1
|
166 |
+
331 guitar case guitar case 21 40 7 guitar case otherprop Objects objects 39
|
167 |
+
87 rack rack 21 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
|
168 |
+
488 water pitcher water pitcher 21 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39
|
169 |
+
776 laundry detergent laundry detergent 21 40 7 otherprop Objects objects 39
|
170 |
+
370 hair dryer hair dryer 21 40 7 hair dryer otherprop Objects n03483316 hand_blower.n.01 objects 39
|
171 |
+
191 pillar pillar 21 38 7 column otherstructure Objects n03073977 column.n.07 column 24
|
172 |
+
748 divider divider 20 40 7 otherprop Objects wall 1
|
173 |
+
242 power outlet power outlet 19 40 7 otherprop Objects misc 40
|
174 |
+
45 dining table dining table 19 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
175 |
+
417 shower floor shower floor 19 2 5 floor floor Floor n04208936 shower.n.01 floor 2
|
176 |
+
70 washing machines washing machine 19 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37
|
177 |
+
188 shower door shower door 19 8 12 door door Wall door n04208936 shower.n.01 door 4
|
178 |
+
1176 coffee kettle coffee kettle 18 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39
|
179 |
+
1177 wardrobe cabinet wardrobe 18 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
|
180 |
+
1178 structure structure 18 38 7 otherstructure Objects misc 40
|
181 |
+
18 bookshelves bookshelf 17 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
182 |
+
110 clothes dryer clothes dryer 17 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37
|
183 |
+
148 toaster toaster 17 40 7 toaster otherprop Objects n04442312 toaster.n.02 appliances 37
|
184 |
+
63 shoe shoe 17 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
|
185 |
+
155 ironing board ironing board 16 39 6 ironing board otherfurniture Furniture n03586090 ironing_board.n.01 objects 39
|
186 |
+
572 alarm clock alarm clock 16 40 7 alarm clock otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39
|
187 |
+
1179 shower head shower head 15 38 7 otherstructure Objects shower 23
|
188 |
+
28 lamp base lamp 15 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
189 |
+
392 water bottle water bottle 15 40 7 bottle otherprop Objects bottle bottle 2876657 n04557648 water_bottle.n.01 objects 39
|
190 |
+
1180 keyboard piano keyboard piano 15 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36
|
191 |
+
609 projector screen projector screen 15 38 7 projector screen otherstructure Objects misc 40
|
192 |
+
1181 case of water bottles case of water bottles 15 40 7 otherprop Objects objects 39
|
193 |
+
195 toaster oven toaster oven 14 40 7 toaster oven otherprop Objects n04442441 toaster_oven.n.01 appliances 37
|
194 |
+
581 music stand music stand 14 39 6 music stand otherfurniture Furniture n03801760 music_stand.n.01 furniture 36
|
195 |
+
58 staircase stairs 14 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16
|
196 |
+
1182 coat rack coat rack 14 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 3
|
197 |
+
1183 storage organizer storage organizer 14 40 7 otherprop Objects shelving 3
|
198 |
+
139 machine machine 14 40 7 machine otherprop Objects n03699975 machine.n.01 appliances 37
|
199 |
+
1184 folded chair folded chair 14 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
200 |
+
1185 fire alarm fire alarm 14 40 7 otherprop Objects n03343737 fire_alarm.n.02 misc 40
|
201 |
+
156 fireplace fireplace 13 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 fireplace 27
|
202 |
+
408 vent vent 13 40 7 otherprop Objects n04526241 vent.n.01 misc 40
|
203 |
+
213 furniture furniture 13 39 6 furniture otherfurniture Furniture n03405725 furniture.n.01 furniture 36
|
204 |
+
1186 power strip power strip 13 40 7 otherprop Objects objects 39
|
205 |
+
1187 calendar calendar 13 40 7 otherprop Objects objects 39
|
206 |
+
1188 poster poster 13 11 8 picture picture Picture n03931044 picture.n.01 picture 6
|
207 |
+
115 toilet paper holder toilet paper holder 13 40 7 toilet paper holder otherprop Objects objects 39
|
208 |
+
1189 potted plant potted plant 12 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14
|
209 |
+
304 stuffed animal stuffed animal 12 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39
|
210 |
+
1190 luggage luggage 12 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39
|
211 |
+
21 curtains curtain 12 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12
|
212 |
+
312 headphones headphones 12 40 7 otherprop Objects n03261776 earphone.n.01 objects 39
|
213 |
+
233 crate crate 12 39 6 crate otherfurniture Furniture n03127925 crate.n.01 objects 39
|
214 |
+
286 candle candle 12 40 7 candle otherprop Objects lamp n02948072 candle.n.01 objects 39
|
215 |
+
264 projector projector 12 40 7 projector otherprop Objects n04009552 projector.n.02 objects 39
|
216 |
+
110 clothes dryers clothes dryer 12 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37
|
217 |
+
1191 mattress mattress 12 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
|
218 |
+
356 dustpan dustpan 12 40 7 otherprop Objects n03259009 dustpan.n.02 objects 39
|
219 |
+
25 drawer drawer 11 39 6 drawer otherfurniture Furniture n03233905 drawer.n.01 furniture 36
|
220 |
+
750 rod rod 11 40 7 otherprop Objects pistol 3948459 n03427202 gat.n.01 misc 40
|
221 |
+
269 globe globe 11 40 7 globe otherprop Objects objects 39
|
222 |
+
307 footrest footrest 11 39 6 foot rest otherfurniture Furniture stool n03380724 footstool.n.01 stool 19
|
223 |
+
410 piano bench piano bench 11 39 6 piano bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34
|
224 |
+
730 breakfast bar breakfast bar 11 38 7 bar otherstructure Objects counter 26
|
225 |
+
216 step stool step stool 11 40 7 step stool otherprop Objects stool n04315713 step_stool.n.01 stool 19
|
226 |
+
1192 hand rail hand rail 11 38 7 railing otherstructure Objects railing 30
|
227 |
+
119 vending machine vending machine 11 40 7 machine otherprop Objects n04525305 vending_machine.n.01 appliances 37
|
228 |
+
682 ceiling fan ceiling fan 11 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40
|
229 |
+
434 swiffer swiffer 11 40 7 otherprop Objects objects 39
|
230 |
+
126 foosball table foosball table 11 39 6 foosball table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5
|
231 |
+
919 jar jar 11 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39
|
232 |
+
85 footstool footstool 11 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19
|
233 |
+
1193 folded table folded table 10 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
234 |
+
108 round table round table 10 7 10 table table Table table table table 4379243 n04114554 round_table.n.02 table 5
|
235 |
+
135 hamper hamper 10 40 7 basket otherprop Objects basket 2801938 n03482405 hamper.n.02 objects 39
|
236 |
+
1194 poster tube poster tube 10 40 7 otherprop Objects objects 39
|
237 |
+
432 case case 10 40 7 case otherprop Objects objects 39
|
238 |
+
53 carpet carpet 10 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2
|
239 |
+
1195 thermostat thermostat 10 40 7 otherprop Objects n04422875 thermostat.n.01 misc 40
|
240 |
+
111 coat coat 10 40 7 jacket otherprop Objects n03057021 coat.n.01 clothes 38
|
241 |
+
305 water fountain water fountain 10 38 7 water fountain otherstructure Objects n03241335 drinking_fountain.n.01 misc 40
|
242 |
+
1125 smoke detector smoke detector 10 40 7 otherprop Objects misc 40
|
243 |
+
13 pillows pillow 9 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8
|
244 |
+
1196 flip flops flip flops 9 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
|
245 |
+
1197 cloth cloth 9 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
|
246 |
+
1198 banner banner 9 40 7 otherprop Objects n02788021 banner.n.01 misc 40
|
247 |
+
1199 clothes hanger clothes hanger 9 40 7 otherprop Objects n03057920 coat_hanger.n.01 objects 39
|
248 |
+
1200 whiteboard eraser whiteboard eraser 9 40 7 otherprop Objects objects 39
|
249 |
+
378 iron iron 9 40 7 otherprop Objects n03584829 iron.n.04 objects 39
|
250 |
+
591 instrument case instrument case 9 40 7 case otherprop Objects objects 39
|
251 |
+
49 toilet paper rolls toilet paper 9 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39
|
252 |
+
92 soap soap 9 40 7 soap otherprop Objects n04253437 soap.n.01 objects 39
|
253 |
+
1098 block block 9 40 7 otherprop Objects misc 40
|
254 |
+
291 wall hanging wall hanging 8 40 7 otherprop Objects n03491178 hanging.n.01 picture 6
|
255 |
+
1063 kitchen island kitchen island 8 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 counter 26
|
256 |
+
107 pipes pipe 8 38 7 otherstructure Objects misc 40
|
257 |
+
1135 toothbrush toothbrush 8 40 7 toothbrush otherprop Objects n04453156 toothbrush.n.01 objects 39
|
258 |
+
189 shirt shirt 8 40 7 otherprop Objects n04197391 shirt.n.01 clothes 38
|
259 |
+
245 cutting board cutting board 8 40 7 cutting board otherprop Objects n03025513 chopping_board.n.01 objects 39
|
260 |
+
194 vase vase 8 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39
|
261 |
+
1201 shower control valve shower control valve 8 38 7 otherstructure Objects n04208936 shower.n.01 shower 23
|
262 |
+
386 exercise machine exercise machine 8 40 7 machine otherprop Objects gym_equipment 33
|
263 |
+
1202 compost bin compost bin 8 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
|
264 |
+
857 shorts shorts 8 40 7 shorts otherprop Objects clothes 38
|
265 |
+
452 tire tire 8 40 7 otherprop Objects n04440749 tire.n.01 objects 39
|
266 |
+
1203 teddy bear teddy bear 7 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39
|
267 |
+
346 bathrobe bathrobe 7 40 7 otherprop Objects n02807616 bathrobe.n.01 clothes 38
|
268 |
+
152 handrail handrail 7 38 7 railing otherstructure Objects n02788148 bannister.n.02 railing 30
|
269 |
+
83 faucet faucet 7 40 7 faucet otherprop Objects faucet 3325088 n03325088 faucet.n.01 misc 40
|
270 |
+
1204 pantry wall pantry wall 7 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
271 |
+
726 thermos thermos 7 40 7 flask otherprop Objects bottle bottle 2876657 n04422727 thermos.n.01 objects 39
|
272 |
+
61 rug rug 7 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2
|
273 |
+
39 couch cushions cushion 7 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8
|
274 |
+
1117 tripod tripod 7 39 6 stand otherfurniture Furniture n04485082 tripod.n.01 objects 39
|
275 |
+
540 mailbox mailbox 7 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40
|
276 |
+
1205 tupperware tupperware 7 40 7 otherprop Objects objects 39
|
277 |
+
415 shoe rack shoe rack 7 40 7 shoe rack otherprop Objects shelving 31
|
278 |
+
31 towels towel 6 27 7 towel towel Objects n04459362 towel.n.01 towel 20
|
279 |
+
1206 beer bottles beer bottle 6 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
280 |
+
153 treadmill treadmill 6 39 6 treadmill otherfurniture Furniture n04477387 treadmill.n.01 gym_equipment 33
|
281 |
+
1207 salt salt 6 40 7 otherprop Objects objects 39
|
282 |
+
129 chest chest 6 39 6 chest otherfurniture Furniture dresser dresser chest_of_drawers 13
|
283 |
+
220 dispenser dispenser 6 40 7 otherprop Objects n03210683 dispenser.n.01 objects 39
|
284 |
+
1208 mirror doors mirror door 6 8 12 door door Wall door n03221720 door.n.01 door 4
|
285 |
+
231 remote remote 6 40 7 otherprop Objects remote_control 4074963 n04074963 remote_control.n.01 objects 39
|
286 |
+
1209 folded ladder folded ladder 6 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 misc 40
|
287 |
+
39 cushion cushion 6 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8
|
288 |
+
1210 carton carton 6 40 7 otherprop Objects objects 39
|
289 |
+
117 step step 6 38 7 otherstructure Objects n04314914 step.n.04 misc 40
|
290 |
+
822 drying rack drying rack 6 39 6 drying rack otherfurniture Furniture shelving 31
|
291 |
+
238 slippers slipper 6 40 7 shoe otherprop Objects n04241394 slipper.n.01 clothes 38
|
292 |
+
143 pool table pool table 6 39 6 pool table otherfurniture Furniture table table table 4379243 n03982430 pool_table.n.01 table 5
|
293 |
+
1211 soda stream soda stream 6 40 7 otherprop Objects objects 39
|
294 |
+
228 toilet brush toilet brush 6 40 7 toilet brush otherprop Objects objects 39
|
295 |
+
494 loft bed loft bed 6 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
|
296 |
+
226 cooking pot cooking pot 6 40 7 pot otherprop Objects objects 39
|
297 |
+
91 heater heater 6 39 6 heater otherfurniture Furniture n03508101 heater.n.01 misc 40
|
298 |
+
1072 messenger bag messenger bag 6 37 7 bag bag Objects objects 39
|
299 |
+
435 stapler stapler 6 40 7 stapler otherprop Objects n04303497 stapler.n.01 objects 39
|
300 |
+
1165 closet walls closet wall 5 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
301 |
+
345 scanner scanner 5 40 7 otherprop Objects appliances 37
|
302 |
+
893 elliptical machine elliptical machine 5 40 7 machine otherprop Objects gym_equipment 33
|
303 |
+
621 kettle kettle 5 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39
|
304 |
+
1212 metronome metronome 5 40 7 otherprop Objects n03757604 metronome.n.01 objects 39
|
305 |
+
297 dumbell dumbell 5 40 7 otherprop Objects objects 39
|
306 |
+
1213 music book music book 5 23 2 book books Books n02870526 book.n.11 objects 39
|
307 |
+
1214 rice cooker rice cooker 5 40 7 otherprop Objects objects 39
|
308 |
+
1215 dart board dart board 5 38 7 board otherstructure Objects n03162940 dartboard.n.01 objects 39
|
309 |
+
529 sewing machine sewing machine 5 40 7 sewing machine otherprop Objects n04179913 sewing_machine.n.01 objects 39
|
310 |
+
1216 grab bar grab bar 5 38 7 railing otherstructure Objects railing 30
|
311 |
+
1217 flowerpot flowerpot 5 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39
|
312 |
+
1218 painting painting 5 11 8 picture picture Picture n03931044 picture.n.01 picture 6
|
313 |
+
1219 railing railing 5 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30
|
314 |
+
1220 stair stair 5 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 stairs 16
|
315 |
+
525 toolbox toolbox 5 39 6 chest otherfurniture Furniture n04452615 toolbox.n.01 objects 39
|
316 |
+
204 nerf gun nerf gun 5 40 7 otherprop Objects objects 39
|
317 |
+
693 binders binder 5 40 7 binder otherprop Objects objects 39
|
318 |
+
179 desk lamp desk lamp 5 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
319 |
+
1221 quadcopter quadcopter 5 40 7 otherprop Objects objects 39
|
320 |
+
1222 pitcher pitcher 5 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39
|
321 |
+
1223 hanging hanging 5 40 7 otherprop Objects misc 40
|
322 |
+
1224 mail mail 5 40 7 otherprop Objects misc 40
|
323 |
+
1225 closet ceiling closet ceiling 5 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17
|
324 |
+
1226 hoverboard hoverboard 5 40 7 otherprop Objects objects 39
|
325 |
+
1227 beanbag chair beanbag chair 5 39 6 bean bag otherfurniture Furniture n02816656 beanbag.n.01 chair 3
|
326 |
+
571 water heater water heater 5 40 7 water heater otherprop Objects n04560113 water_heater.n.01 misc 40
|
327 |
+
1228 spray bottle spray bottle 5 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
328 |
+
556 rope rope 5 40 7 rope otherprop Objects n04108268 rope.n.01 objects 39
|
329 |
+
280 plastic container plastic container 5 40 7 container otherprop Objects objects 39
|
330 |
+
1229 soap bottle soap bottle 5 40 7 soap otherprop Objects objects 39
|
331 |
+
1230 ikea bag ikea bag 4 37 7 bag bag Objects 2773838 n02773838 bag.n.06 objects 39
|
332 |
+
1231 sleeping bag sleeping bag 4 40 7 otherprop Objects n04235860 sleeping_bag.n.01 objects 39
|
333 |
+
1232 duffel bag duffel bag 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
|
334 |
+
746 frying pan frying pan 4 40 7 frying pan otherprop Objects n03400231 frying_pan.n.01 objects 39
|
335 |
+
1233 oven mitt oven mitt 4 40 7 otherprop Objects objects 39
|
336 |
+
1234 pot pot 4 40 7 pot otherprop Objects n04235860 sleeping_bag.n.01 objects 39
|
337 |
+
144 hand dryer hand dryer 4 40 7 otherprop Objects objects 39
|
338 |
+
282 dollhouse dollhouse 4 39 6 doll house otherfurniture Furniture n03219483 dollhouse.n.01 objects 39
|
339 |
+
167 shampoo bottle shampoo bottle 4 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
340 |
+
1235 hair brush hair brush 4 40 7 otherprop Objects n02908217 brush.n.02 objects 39
|
341 |
+
1236 tennis racket tennis racket 4 40 7 otherprop Objects n04409806 tennis_racket.n.01 objects 39
|
342 |
+
1237 display case display case 4 40 7 case otherprop Objects objects 39
|
343 |
+
234 ping pong table ping pong table 4 39 6 ping pong table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5
|
344 |
+
563 boiler boiler 4 40 7 otherprop Objects misc 40
|
345 |
+
1238 bag of coffee beans bag of coffee beans 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
|
346 |
+
1239 bananas banana 4 40 7 otherprop Objects n00021265 food.n.01 objects 39
|
347 |
+
1240 carseat carseat 4 40 7 otherprop Objects misc 40
|
348 |
+
366 helmet helmet 4 40 7 otherprop Objects helmet 3513137 n03513137 helmet.n.02 clothes 38
|
349 |
+
816 umbrella umbrella 4 40 7 umbrella otherprop Objects n04507155 umbrella.n.01 objects 39
|
350 |
+
1241 coffee box coffee box 4 40 7 otherprop Objects objects 39
|
351 |
+
719 envelope envelope 4 40 7 envelope otherprop Objects n03291819 envelope.n.01 objects 39
|
352 |
+
284 wet floor sign wet floor sign 4 40 7 sign otherprop Objects misc 40
|
353 |
+
1242 clothing rack clothing rack 4 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
|
354 |
+
247 controller controller 4 40 7 otherprop Objects n03096960 control.n.09 objects 39
|
355 |
+
1243 bath walls bathroom wall 4 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
356 |
+
1244 podium podium 4 39 6 otherfurniture Furniture n03159640 dais.n.01 furniture 36
|
357 |
+
1245 storage box storage box 4 29 7 box box Objects n02883344 box.n.01 objects 39
|
358 |
+
1246 dolly dolly 4 40 7 otherprop Objects misc 40
|
359 |
+
1247 shampoo shampoo 3 40 7 otherprop Objects n04183516 shampoo.n.01 objects 39
|
360 |
+
592 paper tray paper tray 3 40 7 paper tray otherprop Objects objects 39
|
361 |
+
385 cabinet door cabinet door 3 8 12 door door Wall door door 4
|
362 |
+
1248 changing station changing station 3 40 7 otherprop Objects misc 40
|
363 |
+
1249 poster printer poster printer 3 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37
|
364 |
+
133 screen screen 3 40 7 otherprop Objects n03151077 curtain.n.01 curtain 12
|
365 |
+
301 soap bar soap bar 3 38 7 bar otherstructure Objects objects 39
|
366 |
+
1250 crutches crutches 3 40 7 otherprop Objects n03141823 crutch.n.01 objects 39
|
367 |
+
379 studio light studio light 3 38 7 light otherstructure Objects lighting 28
|
368 |
+
130 stack of cups cup 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
|
369 |
+
1251 toilet flush button toilet flush button 3 40 7 otherprop Objects objects 39
|
370 |
+
450 trunk trunk 3 40 7 otherprop Objects misc 40
|
371 |
+
1252 grocery bag grocery bag 3 37 7 bag bag Objects suitcase 2773838 n03461288 grocery_bag.n.01 objects 39
|
372 |
+
316 plastic bin plastic bin 3 40 7 bin otherprop Objects objects 39
|
373 |
+
1253 pizza box pizza box 3 29 7 box box Objects objects 39
|
374 |
+
385 cabinet doors cabinet door 3 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 door 4
|
375 |
+
1254 legs legs 3 31 7 person person Objects person n05217688 person.n.02 misc 40
|
376 |
+
461 car car 3 40 7 car otherprop Objects car car 2958343 n02958343 car.n.01 misc 40
|
377 |
+
1255 shaving cream shaving cream 3 40 7 otherprop Objects n04186051 shaving_cream.n.01 objects 39
|
378 |
+
1256 luggage stand luggage stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
|
379 |
+
599 shredder shredder 3 40 7 otherprop Objects n04210120 shredder.n.01 objects 39
|
380 |
+
281 statue statue 3 40 7 sculpture otherprop Objects n04306847 statue.n.01 misc 40
|
381 |
+
1257 urinal urinal 3 33 7 toilet toilet Objects toilet toilet n04515991 urinal.n.01 toilet 18
|
382 |
+
1258 hose hose 3 40 7 otherprop Objects n03539875 hose.n.03 misc 40
|
383 |
+
1259 bike pump bike pump 3 40 7 otherprop Objects objects 39
|
384 |
+
319 coatrack coatrack 3 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31
|
385 |
+
1260 bear bear 3 40 7 otherprop Objects objects 39
|
386 |
+
28 wall lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
387 |
+
1261 humidifier humidifier 3 40 7 otherprop Objects objects 39
|
388 |
+
546 toothpaste toothpaste 3 40 7 toothpaste otherprop Objects objects 39
|
389 |
+
1262 mouthwash bottle mouthwash bottle 3 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
390 |
+
1263 poster cutter poster cutter 3 40 7 otherprop Objects objects 39
|
391 |
+
1264 golf bag golf bag 3 37 7 bag bag Objects suitcase 2773838 n03445617 golf_bag.n.01 objects 39
|
392 |
+
1265 food container food container 3 40 7 container otherprop Objects n03094503 container.n.01 objects 39
|
393 |
+
1266 camera camera 3 40 7 otherprop Objects objects 39
|
394 |
+
28 table lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n04380533 table_lamp.n.01 lighting 28
|
395 |
+
1267 yoga mat yoga mat 3 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2
|
396 |
+
1268 card card 3 40 7 otherprop Objects objects 39
|
397 |
+
1269 mug mug 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
|
398 |
+
188 shower doors shower door 3 38 7 otherstructure Objects n04208936 shower.n.01 door 4
|
399 |
+
689 cardboard cardboard 3 40 7 otherprop Objects objects 39
|
400 |
+
1270 rack stand rack stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
|
401 |
+
1271 boxes of paper boxes of paper 3 29 7 box box Objects n02883344 box.n.01 objects 39
|
402 |
+
1272 flag flag 3 40 7 otherprop Objects misc 40
|
403 |
+
354 futon futon 3 39 6 mattress otherfurniture Furniture n03408444 futon.n.01 sofa 10
|
404 |
+
339 magazine magazine 3 40 7 magazine otherprop Objects n06595351 magazine.n.01 objects 39
|
405 |
+
1009 exit sign exit sign 3 40 7 exit sign otherprop Objects misc 40
|
406 |
+
1273 rolled poster rolled poster 3 40 7 otherprop Objects objects 39
|
407 |
+
1274 wheel wheel 3 40 7 otherprop Objects objects 39
|
408 |
+
15 pictures picture 3 11 8 picture picture Picture n03931044 picture.n.01 picture 6
|
409 |
+
1275 blackboard eraser blackboard eraser 3 40 7 eraser otherprop Objects n03294833 eraser.n.01 objects 39
|
410 |
+
361 organizer organizer 3 40 7 otherprop Objects n03918737 personal_digital_assistant.n.01 objects 39
|
411 |
+
1276 doll doll 3 40 7 toy otherprop Objects n03219135 doll.n.01 objects 39
|
412 |
+
326 book rack book rack 3 39 6 bookrack otherfurniture Furniture objects 39
|
413 |
+
1277 laundry bag laundry bag 3 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39
|
414 |
+
1278 sponge sponge 3 40 7 otherprop Objects n01906749 sponge.n.04 objects 39
|
415 |
+
116 seating seat 3 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36
|
416 |
+
1184 folded chairs folded chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
417 |
+
1279 lotion bottle lotion bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
418 |
+
212 can can 2 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39
|
419 |
+
1280 lunch box lunch box 2 40 7 otherprop Objects objects 39
|
420 |
+
1281 food display food display 2 40 7 otherprop Objects misc 40
|
421 |
+
794 storage shelf storage shelf 2 40 7 otherprop Objects shelving 31
|
422 |
+
1282 sliding wood door sliding wood door 2 40 7 otherprop Objects door 4
|
423 |
+
955 pants pants 2 40 7 otherprop Objects n04489008 trouser.n.01 clothes 38
|
424 |
+
387 wood wood 2 40 7 otherprop Objects misc 40
|
425 |
+
69 boards board 2 38 7 board otherstructure Objects board_panel 35
|
426 |
+
65 bottles bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
427 |
+
523 washcloth washcloth 2 40 7 otherprop Objects n04554523 washcloth.n.01 towel 20
|
428 |
+
389 workbench workbench 2 39 6 bench otherfurniture Furniture bench table 4379243 n04600486 workbench.n.01 table 5
|
429 |
+
29 open kitchen cabinet kitchen cabinet 2 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7
|
430 |
+
1283 organizer shelf organizer shelf 2 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
431 |
+
146 frame frame 2 38 7 otherstructure Objects misc 40
|
432 |
+
130 cups cup 2 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
|
433 |
+
372 exercise ball exercise ball 2 40 7 ball otherprop Objects n04285146 sports_equipment.n.01 gym_equipment 33
|
434 |
+
289 easel easel 2 39 6 stand otherfurniture Furniture n03262809 easel.n.01 furniture 36
|
435 |
+
440 garbage bag garbage bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
|
436 |
+
321 roomba roomba 2 40 7 otherprop Objects objects 39
|
437 |
+
976 garage door garage door 2 38 7 garage door otherstructure Objects door door 4
|
438 |
+
1256 luggage rack luggage stand 2 39 6 stand otherfurniture Furniture n04038440 shelving 31
|
439 |
+
1284 bike lock bike lock 2 40 7 otherprop Objects objects 39
|
440 |
+
1285 briefcase briefcase 2 40 7 otherprop Objects n02900705 briefcase.n.01 objects 39
|
441 |
+
357 hand towel hand towel 2 27 7 towel towel Objects n03490006 hand_towel.n.01 towel 20
|
442 |
+
1286 bath products bath product 2 40 7 otherprop Objects objects 39
|
443 |
+
1287 star star 2 40 7 otherprop Objects n09444783 star.n.03 misc 40
|
444 |
+
365 map map 2 40 7 map otherprop Objects n03720163 map.n.01 misc 40
|
445 |
+
1288 coffee bean bag coffee bean bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
|
446 |
+
81 headboard headboard 2 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 bed 11
|
447 |
+
1289 ipad ipad 2 40 7 otherprop Objects objects 39
|
448 |
+
1290 display rack display rack 2 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
|
449 |
+
948 traffic cone traffic cone 2 40 7 cone otherprop Objects cone objects 39
|
450 |
+
174 toiletry toiletry 2 40 7 otherprop Objects n04447443 toiletry.n.01 objects 39
|
451 |
+
1028 canopy canopy 2 40 7 otherprop Objects misc 40
|
452 |
+
1291 massage chair massage chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
453 |
+
1292 paper organizer paper organizer 2 40 7 otherprop Objects objects 39
|
454 |
+
1005 barricade barricade 2 40 7 otherprop Objects misc 40
|
455 |
+
235 platform platform 2 38 7 otherstructure Objects misc 40
|
456 |
+
1293 cap cap 2 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38
|
457 |
+
1294 dumbbell plates dumbbell plates 2 40 7 otherprop Objects objects 39
|
458 |
+
1295 elevator elevator 2 38 7 otherstructure Objects misc 40
|
459 |
+
1296 cooking pan cooking pan 2 40 7 pan otherprop Objects n03880531 pan.n.01 objects 39
|
460 |
+
1297 trash bag trash bag 2 37 7 bag bag Objects objects 39
|
461 |
+
1298 santa santa 2 40 7 otherprop Objects misc 40
|
462 |
+
1299 jewelry box jewelry box 2 29 7 box box Objects n02883344 box.n.01 objects 39
|
463 |
+
1300 boat boat 2 40 7 otherprop Objects misc 40
|
464 |
+
1301 sock sock 2 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38
|
465 |
+
1051 kinect kinect 2 40 7 kinect otherprop Objects objects 39
|
466 |
+
566 crib crib 2 39 6 crib otherfurniture Furniture furniture 36
|
467 |
+
1302 plastic storage bin plastic storage bin 2 40 7 container otherprop Objects n03094503 container.n.01 objects 39
|
468 |
+
1062 cooler cooler 2 24 6 refridgerator refridgerator Furniture n03102654 cooler.n.01 appliances 37
|
469 |
+
1303 kitchen apron kitchen apron 2 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
|
470 |
+
1304 dishwashing soap bottle dishwashing soap bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
471 |
+
1305 xbox controller xbox controller 2 40 7 otherprop Objects objects 39
|
472 |
+
1306 banana holder banana holder 2 40 7 otherprop Objects objects 39
|
473 |
+
298 ping pong paddle ping pong paddle 2 40 7 otherprop Objects table 5
|
474 |
+
1307 airplane airplane 2 40 7 otherprop Objects misc 40
|
475 |
+
1308 conditioner bottle conditioner bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
476 |
+
1309 tea kettle tea kettle 2 40 7 tea kettle otherprop Objects n04397768 teakettle.n.01 objects 39
|
477 |
+
43 bedframe bedframe 2 39 6 otherfurniture Furniture n02822579 bedstead.n.01 bed 11
|
478 |
+
1310 wood beam wood beam 2 38 7 otherstructure Objects beam 29
|
479 |
+
593 toilet paper package toilet paper package 2 40 7 otherprop Objects objects 39
|
480 |
+
1311 wall mounted coat rack wall mounted coat rack 2 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31
|
481 |
+
1312 film light film light 2 40 7 otherprop Objects lighting 28
|
482 |
+
749 ceiling lamp ceiling lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
483 |
+
623 chain chain 1 40 7 otherprop Objects chair 3
|
484 |
+
1313 sofa sofa 1 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10
|
485 |
+
99 closet wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
|
486 |
+
265 sweater sweater 1 40 7 otherprop Objects n04370048 sweater.n.01 clothes 38
|
487 |
+
1314 kitchen mixer kitchen mixer 1 40 7 otherprop Objects appliances 37
|
488 |
+
99 wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
|
489 |
+
1315 water softener water softener 1 40 7 otherprop Objects misc 40
|
490 |
+
448 banister banister 1 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30
|
491 |
+
257 trolley trolley 1 40 7 trolley otherprop Objects n04335435 streetcar.n.01 misc 40
|
492 |
+
1316 pantry shelf pantry shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
493 |
+
786 sofa bed sofa bed 1 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
|
494 |
+
801 loofa loofa 1 40 7 otherprop Objects objects 39
|
495 |
+
972 shower faucet handle shower faucet handle 1 40 7 handle otherprop Objects shower 23
|
496 |
+
1317 toy piano toy piano 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39
|
497 |
+
1318 fish fish 1 40 7 otherprop Objects n02512053 fish.n.01 objects 39
|
498 |
+
75 file cabinets file cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n03337140 file.n.03 cabinet 7
|
499 |
+
657 cat litter box cat litter box 1 29 7 box box Objects objects 39
|
500 |
+
561 electric panel electric panel 1 40 7 otherprop Objects misc 40
|
501 |
+
93 suitcases suitcase 1 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39
|
502 |
+
513 curtain rod curtain rod 1 38 7 curtain rod otherstructure Objects curtain 12
|
503 |
+
411 bunk bed bunk bed 1 39 6 bunk bed otherfurniture Furniture bed bed bed 2818832 n02920259 bunk_bed.n.01 bed 11
|
504 |
+
1122 chandelier chandelier 1 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 lighting 28
|
505 |
+
922 tape tape 1 40 7 tape otherprop Objects objects 39
|
506 |
+
88 plates plate 1 40 7 otherprop Objects n03959485 plate.n.04 objects 39
|
507 |
+
518 alarm alarm 1 40 7 alarm otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39
|
508 |
+
814 fire hose fire hose 1 40 7 otherprop Objects n03346004 fire_hose.n.01 misc 40
|
509 |
+
1319 toy dinosaur toy dinosaur 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39
|
510 |
+
1320 cone cone 1 40 7 otherprop Objects objects 39
|
511 |
+
649 glass doors glass door 1 8 12 door door Wall door n03221720 door.n.01 door 4
|
512 |
+
607 hatrack hatrack 1 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31
|
513 |
+
819 subwoofer subwoofer 1 40 7 speaker otherprop Objects speaker 3691459 n04349401 subwoofer.n.01 objects 39
|
514 |
+
1321 fire sprinkler fire sprinkler 1 40 7 otherprop Objects misc 40
|
515 |
+
1322 trash cabinet trash cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
516 |
+
1204 pantry walls pantry wall 1 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
517 |
+
227 photo photo 1 40 7 photo otherprop Objects n03925226 photograph.n.01 picture 6
|
518 |
+
817 barrier barrier 1 40 7 otherprop Objects n02796623 barrier.n.01 misc 40
|
519 |
+
130 stacks of cups cup 1 40 7 otherprop Objects n03147509 cup.n.01 objects 39
|
520 |
+
712 beachball beachball 1 40 7 ball otherprop Objects n02814224 beach_ball.n.01 objects 39
|
521 |
+
1323 folded boxes folded boxes 1 40 7 otherprop Objects objects 39
|
522 |
+
1324 contact lens solution bottle contact lens solution bottle 1 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
523 |
+
673 covered box covered box 1 29 7 box box Objects objects 39
|
524 |
+
459 folder folder 1 40 7 folder otherprop Objects n03376279 folder.n.02 objects 39
|
525 |
+
643 mail trays mail tray 1 40 7 mail tray otherprop Objects objects 39
|
526 |
+
238 slipper slipper 1 40 7 otherprop Objects n04241394 slipper.n.01 clothes 38
|
527 |
+
765 magazine rack magazine rack 1 39 6 stand otherfurniture Furniture n03704549 magazine_rack.n.01 shelving 31
|
528 |
+
1008 sticker sticker 1 40 7 sticker otherprop Objects n07272545 gummed_label.n.01 objects 39
|
529 |
+
225 lotion lotion 1 40 7 otherprop Objects n03690938 lotion.n.01 objects 39
|
530 |
+
1083 buddha buddha 1 40 7 otherprop Objects objects 39
|
531 |
+
813 file organizer file organizer 1 40 7 otherprop Objects objects 39
|
532 |
+
138 paper towel rolls paper towel roll 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20
|
533 |
+
1145 night lamp night lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
534 |
+
796 fuse box fuse box 1 40 7 otherprop Objects misc 40
|
535 |
+
1325 knife block knife block 1 40 7 otherprop Objects objects 39
|
536 |
+
363 furnace furnace 1 39 6 furnace otherfurniture Furniture n03404449 furnace.n.01
|
537 |
+
1174 cd cases cd case 1 40 7 otherprop Objects objects 39
|
538 |
+
38 stools stool 1 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19
|
539 |
+
1326 hand sanitzer dispenser hand sanitzer dispenser 1 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39
|
540 |
+
997 teapot teapot 1 40 7 tea pot otherprop Objects n04398044 teapot.n.01 objects 39
|
541 |
+
1327 pen holder pen holder 1 40 7 otherprop Objects objects 39
|
542 |
+
1328 tray rack tray rack 1 40 7 otherprop Objects objects 39
|
543 |
+
1329 wig wig 1 40 7 otherprop Objects n04584207 wig.n.01 objects 39
|
544 |
+
182 switch switch 1 40 7 otherprop Objects n04372370 switch.n.01 misc 40
|
545 |
+
280 plastic containers plastic container 1 40 7 container otherprop Objects n03094503 container.n.01 objects 39
|
546 |
+
1330 night light night light 1 40 7 otherprop Objects lighting 28
|
547 |
+
1331 notepad notepad 1 40 7 otherprop Objects objects 39
|
548 |
+
1332 mail bin mail bin 1 40 7 otherprop Objects misc 40
|
549 |
+
1333 elevator button elevator button 1 40 7 otherprop Objects misc 40
|
550 |
+
939 gaming wheel gaming wheel 1 40 7 otherprop Objects objects 39
|
551 |
+
1334 drum set drum set 1 40 7 otherprop Objects objects 39
|
552 |
+
480 cosmetic bag cosmetic bag 1 37 7 bag bag Objects objects 39
|
553 |
+
907 coffee mug coffee mug 1 40 7 vessel otherprop Objects cup or mug 3797390 n03063599 coffee_mug.n.01 objects 39
|
554 |
+
1335 closet shelf closet shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
555 |
+
1336 baby mobile baby mobile 1 40 7 otherprop Objects objects 39
|
556 |
+
829 diaper bin diaper bin 1 40 7 bin otherprop Objects objects 39
|
557 |
+
947 door wall door wall 1 1 12 wall wall Wall wall 1
|
558 |
+
1116 stepstool stepstool 1 40 7 step stool otherprop Objects objects 39
|
559 |
+
599 paper shredder shredder 1 40 7 otherprop Objects n04210120 shredder.n.01 objects 39
|
560 |
+
733 dress rack dress rack 1 40 7 otherprop Objects n03238762 dress_rack.n.01 misc 40
|
561 |
+
123 cover cover 1 40 7 blanket otherprop Objects objects 39
|
562 |
+
506 shopping bag shopping bag 1 37 7 bag bag Objects n04204081 shopping_bag.n.01 objects 39
|
563 |
+
569 sliding door sliding door 1 8 12 door door Wall door n04239074 sliding_door.n.01 door 4
|
564 |
+
1337 exercise bike exercise bike 1 40 7 machine otherprop Objects n04210120 shredder.n.01 gym_equipment 33
|
565 |
+
1338 recliner chair recliner chair 1 5 4 chair chair Chair chair chair chair 3001627 n03238762 dress_rack.n.01 chair 3
|
566 |
+
1314 kitchenaid mixer kitchen mixer 1 40 7 otherprop Objects appliances 37
|
567 |
+
1339 soda can soda can 1 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39
|
568 |
+
1340 stovetop stovetop 1 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37
|
569 |
+
851 stepladder stepladder 1 39 6 ladder otherfurniture Furniture stairs n04315599 step_ladder.n.01 stairs 16
|
570 |
+
142 tap tap 1 40 7 faucet otherprop Objects faucet 3325088 n04559451 water_faucet.n.01 objects 39
|
571 |
+
436 cable cable 1 40 7 cables otherprop Objects objects 39
|
572 |
+
1341 baby changing station baby changing station 1 39 6 otherfurniture Furniture furniture 36
|
573 |
+
1342 costume costume 1 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
|
574 |
+
885 rocking chair rocking chair 1 5 4 chair chair Chair chair chair chair 3001627 n04099969 rocking_chair.n.01 chair 3
|
575 |
+
693 binder binder 1 40 7 binder otherprop Objects objects 39
|
576 |
+
815 media center media center 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
577 |
+
401 towel rack towel rack 1 40 7 otherprop Objects n04459773 towel_rack.n.01 misc 40
|
578 |
+
1343 medal medal 1 40 7 otherprop Objects objects 39
|
579 |
+
1184 stack of folded chairs folded chair 1 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
580 |
+
1344 telescope telescope 1 40 7 otherprop Objects n04403638 telescope.n.01 objects 39
|
581 |
+
1345 closet doorframe closet doorframe 1 8 12 door door Wall door door 4
|
582 |
+
160 glass glass 1 38 7 glass otherstructure Objects n03438257 glass.n.02 misc 40
|
583 |
+
1126 baseball cap baseball cap 1 40 7 otherprop Objects cap 2954340 n02799323 baseball_cap.n.01 clothes 38
|
584 |
+
1346 battery disposal jar battery disposal jar 1 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39
|
585 |
+
332 mop mop 1 40 7 otherprop Objects n04367480 swab.n.02 objects 39
|
586 |
+
397 tank tank 1 40 7 otherprop Objects objects 39
|
587 |
+
643 mail tray mail tray 1 40 7 mail tray otherprop Objects objects 39
|
588 |
+
551 centerpiece centerpiece 1 40 7 centerpiece otherprop Objects n02994419 centerpiece.n.02 objects 39
|
589 |
+
1163 stick stick 1 40 7 stick otherprop Objects objects 39
|
590 |
+
1347 closet floor closet floor 1 2 5 floor floor Floor n03365592 floor.n.01 floor 2
|
591 |
+
1348 dryer sheets dryer sheets 1 40 7 otherprop Objects objects 39
|
592 |
+
803 bycicle bycicle 1 40 7 otherprop Objects misc 40
|
593 |
+
484 flower stand flower stand 1 39 6 stand otherfurniture Furniture furniture 36
|
594 |
+
1349 air mattress air mattress 1 4 1 bed bed Bed bed bed bed 2818832 n02690809 air_mattress.n.01 bed 11
|
595 |
+
1350 clip clip 1 40 7 otherprop Objects objects 39
|
596 |
+
222 side table side table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
597 |
+
1253 pizza boxes pizza box 1 29 7 box box Objects n02883344 box.n.01 objects 39
|
598 |
+
1351 display display 1 39 7 otherfurniture Furniture n03211117 display.n.06 misc 40
|
599 |
+
1352 postcard postcard 1 40 7 otherprop Objects objects 39
|
600 |
+
828 display sign display sign 1 40 7 sign otherprop Objects misc 40
|
601 |
+
1353 paper towel paper towel 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20
|
602 |
+
612 boots boot 1 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
|
603 |
+
1354 tennis racket bag tennis racket bag 1 40 7 otherprop Objects objects 39
|
604 |
+
1355 air hockey table air hockey table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
605 |
+
1301 socks sock 1 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38
|
606 |
+
1356 food bag food bag 1 37 7 bag bag Objects objects 39
|
607 |
+
1199 clothes hangers clothes hanger 1 40 7 otherprop Objects n03057920 coat_hanger.n.01 misc 40
|
608 |
+
1357 starbucks cup starbucks cup 1 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
|
assets/meta/scannetv2_raw_categories.json
ADDED
@@ -0,0 +1,609 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
"wall",
|
3 |
+
"chair",
|
4 |
+
"books",
|
5 |
+
"floor",
|
6 |
+
"door",
|
7 |
+
"object",
|
8 |
+
"window",
|
9 |
+
"table",
|
10 |
+
"trash can",
|
11 |
+
"pillow",
|
12 |
+
"picture",
|
13 |
+
"ceiling",
|
14 |
+
"box",
|
15 |
+
"doorframe",
|
16 |
+
"monitor",
|
17 |
+
"cabinet",
|
18 |
+
"desk",
|
19 |
+
"shelf",
|
20 |
+
"office chair",
|
21 |
+
"towel",
|
22 |
+
"couch",
|
23 |
+
"sink",
|
24 |
+
"backpack",
|
25 |
+
"lamp",
|
26 |
+
"bed",
|
27 |
+
"bookshelf",
|
28 |
+
"mirror",
|
29 |
+
"curtain",
|
30 |
+
"plant",
|
31 |
+
"whiteboard",
|
32 |
+
"radiator",
|
33 |
+
"book",
|
34 |
+
"kitchen cabinet",
|
35 |
+
"toilet paper",
|
36 |
+
"kitchen cabinets",
|
37 |
+
"armchair",
|
38 |
+
"shoes",
|
39 |
+
"coffee table",
|
40 |
+
"toilet",
|
41 |
+
"bag",
|
42 |
+
"clothes",
|
43 |
+
"keyboard",
|
44 |
+
"bottle",
|
45 |
+
"recycling bin",
|
46 |
+
"nightstand",
|
47 |
+
"stool",
|
48 |
+
"tv",
|
49 |
+
"file cabinet",
|
50 |
+
"dresser",
|
51 |
+
"computer tower",
|
52 |
+
"clothing",
|
53 |
+
"telephone",
|
54 |
+
"cup",
|
55 |
+
"refrigerator",
|
56 |
+
"end table",
|
57 |
+
"jacket",
|
58 |
+
"shower curtain",
|
59 |
+
"bathtub",
|
60 |
+
"microwave",
|
61 |
+
"kitchen counter",
|
62 |
+
"sofa chair",
|
63 |
+
"paper towel dispenser",
|
64 |
+
"bathroom vanity",
|
65 |
+
"suitcase",
|
66 |
+
"laptop",
|
67 |
+
"ottoman",
|
68 |
+
"shower walls",
|
69 |
+
"printer",
|
70 |
+
"counter",
|
71 |
+
"board",
|
72 |
+
"soap dispenser",
|
73 |
+
"stove",
|
74 |
+
"light",
|
75 |
+
"closet wall",
|
76 |
+
"mini fridge",
|
77 |
+
"cabinets",
|
78 |
+
"doors",
|
79 |
+
"fan",
|
80 |
+
"tissue box",
|
81 |
+
"blanket",
|
82 |
+
"bathroom stall",
|
83 |
+
"copier",
|
84 |
+
"bench",
|
85 |
+
"bar",
|
86 |
+
"soap dish",
|
87 |
+
"laundry hamper",
|
88 |
+
"storage bin",
|
89 |
+
"bathroom stall door",
|
90 |
+
"light switch",
|
91 |
+
"coffee maker",
|
92 |
+
"tv stand",
|
93 |
+
"decoration",
|
94 |
+
"ceiling light",
|
95 |
+
"range hood",
|
96 |
+
"blackboard",
|
97 |
+
"clock",
|
98 |
+
"wardrobe closet",
|
99 |
+
"rail",
|
100 |
+
"bulletin board",
|
101 |
+
"mat",
|
102 |
+
"trash bin",
|
103 |
+
"ledge",
|
104 |
+
"seat",
|
105 |
+
"mouse",
|
106 |
+
"basket",
|
107 |
+
"shower",
|
108 |
+
"dumbbell",
|
109 |
+
"paper",
|
110 |
+
"person",
|
111 |
+
"windowsill",
|
112 |
+
"closet",
|
113 |
+
"bucket",
|
114 |
+
"sign",
|
115 |
+
"speaker",
|
116 |
+
"dishwasher",
|
117 |
+
"container",
|
118 |
+
"stair rail",
|
119 |
+
"shower curtain rod",
|
120 |
+
"tube",
|
121 |
+
"bathroom cabinet",
|
122 |
+
"papers",
|
123 |
+
"storage container",
|
124 |
+
"paper bag",
|
125 |
+
"paper towel roll",
|
126 |
+
"ball",
|
127 |
+
"closet doors",
|
128 |
+
"laundry basket",
|
129 |
+
"cart",
|
130 |
+
"closet door",
|
131 |
+
"dish rack",
|
132 |
+
"stairs",
|
133 |
+
"blinds",
|
134 |
+
"stack of chairs",
|
135 |
+
"purse",
|
136 |
+
"bicycle",
|
137 |
+
"tray",
|
138 |
+
"plunger",
|
139 |
+
"paper cutter",
|
140 |
+
"toilet paper dispenser",
|
141 |
+
"boxes",
|
142 |
+
"bin",
|
143 |
+
"toilet seat cover dispenser",
|
144 |
+
"guitar",
|
145 |
+
"mailboxes",
|
146 |
+
"handicap bar",
|
147 |
+
"fire extinguisher",
|
148 |
+
"ladder",
|
149 |
+
"column",
|
150 |
+
"pipe",
|
151 |
+
"vacuum cleaner",
|
152 |
+
"plate",
|
153 |
+
"piano",
|
154 |
+
"water cooler",
|
155 |
+
"cd case",
|
156 |
+
"bowl",
|
157 |
+
"closet rod",
|
158 |
+
"bathroom counter",
|
159 |
+
"oven",
|
160 |
+
"stand",
|
161 |
+
"scale",
|
162 |
+
"washing machine",
|
163 |
+
"broom",
|
164 |
+
"hat",
|
165 |
+
"shower wall",
|
166 |
+
"guitar case",
|
167 |
+
"rack",
|
168 |
+
"water pitcher",
|
169 |
+
"laundry detergent",
|
170 |
+
"hair dryer",
|
171 |
+
"pillar",
|
172 |
+
"divider",
|
173 |
+
"power outlet",
|
174 |
+
"dining table",
|
175 |
+
"shower floor",
|
176 |
+
"washing machines",
|
177 |
+
"shower door",
|
178 |
+
"coffee kettle",
|
179 |
+
"wardrobe cabinet",
|
180 |
+
"structure",
|
181 |
+
"bookshelves",
|
182 |
+
"clothes dryer",
|
183 |
+
"toaster",
|
184 |
+
"shoe",
|
185 |
+
"ironing board",
|
186 |
+
"alarm clock",
|
187 |
+
"shower head",
|
188 |
+
"lamp base",
|
189 |
+
"water bottle",
|
190 |
+
"keyboard piano",
|
191 |
+
"projector screen",
|
192 |
+
"case of water bottles",
|
193 |
+
"toaster oven",
|
194 |
+
"music stand",
|
195 |
+
"staircase",
|
196 |
+
"coat rack",
|
197 |
+
"storage organizer",
|
198 |
+
"machine",
|
199 |
+
"folded chair",
|
200 |
+
"fire alarm",
|
201 |
+
"fireplace",
|
202 |
+
"vent",
|
203 |
+
"furniture",
|
204 |
+
"power strip",
|
205 |
+
"calendar",
|
206 |
+
"poster",
|
207 |
+
"toilet paper holder",
|
208 |
+
"potted plant",
|
209 |
+
"stuffed animal",
|
210 |
+
"luggage",
|
211 |
+
"curtains",
|
212 |
+
"headphones",
|
213 |
+
"crate",
|
214 |
+
"candle",
|
215 |
+
"projector",
|
216 |
+
"clothes dryers",
|
217 |
+
"mattress",
|
218 |
+
"dustpan",
|
219 |
+
"drawer",
|
220 |
+
"rod",
|
221 |
+
"globe",
|
222 |
+
"footrest",
|
223 |
+
"piano bench",
|
224 |
+
"breakfast bar",
|
225 |
+
"step stool",
|
226 |
+
"hand rail",
|
227 |
+
"vending machine",
|
228 |
+
"ceiling fan",
|
229 |
+
"swiffer",
|
230 |
+
"foosball table",
|
231 |
+
"jar",
|
232 |
+
"footstool",
|
233 |
+
"folded table",
|
234 |
+
"round table",
|
235 |
+
"hamper",
|
236 |
+
"poster tube",
|
237 |
+
"case",
|
238 |
+
"carpet",
|
239 |
+
"thermostat",
|
240 |
+
"coat",
|
241 |
+
"water fountain",
|
242 |
+
"smoke detector",
|
243 |
+
"pillows",
|
244 |
+
"flip flops",
|
245 |
+
"cloth",
|
246 |
+
"banner",
|
247 |
+
"clothes hanger",
|
248 |
+
"whiteboard eraser",
|
249 |
+
"iron",
|
250 |
+
"instrument case",
|
251 |
+
"toilet paper rolls",
|
252 |
+
"soap",
|
253 |
+
"block",
|
254 |
+
"wall hanging",
|
255 |
+
"kitchen island",
|
256 |
+
"pipes",
|
257 |
+
"toothbrush",
|
258 |
+
"shirt",
|
259 |
+
"cutting board",
|
260 |
+
"vase",
|
261 |
+
"shower control valve",
|
262 |
+
"exercise machine",
|
263 |
+
"compost bin",
|
264 |
+
"shorts",
|
265 |
+
"tire",
|
266 |
+
"teddy bear",
|
267 |
+
"bathrobe",
|
268 |
+
"handrail",
|
269 |
+
"faucet",
|
270 |
+
"pantry wall",
|
271 |
+
"thermos",
|
272 |
+
"rug",
|
273 |
+
"couch cushions",
|
274 |
+
"tripod",
|
275 |
+
"mailbox",
|
276 |
+
"tupperware",
|
277 |
+
"shoe rack",
|
278 |
+
"towels",
|
279 |
+
"beer bottles",
|
280 |
+
"treadmill",
|
281 |
+
"salt",
|
282 |
+
"chest",
|
283 |
+
"dispenser",
|
284 |
+
"mirror doors",
|
285 |
+
"remote",
|
286 |
+
"folded ladder",
|
287 |
+
"cushion",
|
288 |
+
"carton",
|
289 |
+
"step",
|
290 |
+
"drying rack",
|
291 |
+
"slippers",
|
292 |
+
"pool table",
|
293 |
+
"soda stream",
|
294 |
+
"toilet brush",
|
295 |
+
"loft bed",
|
296 |
+
"cooking pot",
|
297 |
+
"heater",
|
298 |
+
"messenger bag",
|
299 |
+
"stapler",
|
300 |
+
"closet walls",
|
301 |
+
"scanner",
|
302 |
+
"elliptical machine",
|
303 |
+
"kettle",
|
304 |
+
"metronome",
|
305 |
+
"dumbell",
|
306 |
+
"music book",
|
307 |
+
"rice cooker",
|
308 |
+
"dart board",
|
309 |
+
"sewing machine",
|
310 |
+
"grab bar",
|
311 |
+
"flowerpot",
|
312 |
+
"painting",
|
313 |
+
"railing",
|
314 |
+
"stair",
|
315 |
+
"toolbox",
|
316 |
+
"nerf gun",
|
317 |
+
"binders",
|
318 |
+
"desk lamp",
|
319 |
+
"quadcopter",
|
320 |
+
"pitcher",
|
321 |
+
"hanging",
|
322 |
+
"mail",
|
323 |
+
"closet ceiling",
|
324 |
+
"hoverboard",
|
325 |
+
"beanbag chair",
|
326 |
+
"water heater",
|
327 |
+
"spray bottle",
|
328 |
+
"rope",
|
329 |
+
"plastic container",
|
330 |
+
"soap bottle",
|
331 |
+
"ikea bag",
|
332 |
+
"sleeping bag",
|
333 |
+
"duffel bag",
|
334 |
+
"frying pan",
|
335 |
+
"oven mitt",
|
336 |
+
"pot",
|
337 |
+
"hand dryer",
|
338 |
+
"dollhouse",
|
339 |
+
"shampoo bottle",
|
340 |
+
"hair brush",
|
341 |
+
"tennis racket",
|
342 |
+
"display case",
|
343 |
+
"ping pong table",
|
344 |
+
"boiler",
|
345 |
+
"bag of coffee beans",
|
346 |
+
"bananas",
|
347 |
+
"carseat",
|
348 |
+
"helmet",
|
349 |
+
"umbrella",
|
350 |
+
"coffee box",
|
351 |
+
"envelope",
|
352 |
+
"wet floor sign",
|
353 |
+
"clothing rack",
|
354 |
+
"controller",
|
355 |
+
"bath walls",
|
356 |
+
"podium",
|
357 |
+
"storage box",
|
358 |
+
"dolly",
|
359 |
+
"shampoo",
|
360 |
+
"paper tray",
|
361 |
+
"cabinet door",
|
362 |
+
"changing station",
|
363 |
+
"poster printer",
|
364 |
+
"screen",
|
365 |
+
"soap bar",
|
366 |
+
"crutches",
|
367 |
+
"studio light",
|
368 |
+
"stack of cups",
|
369 |
+
"toilet flush button",
|
370 |
+
"trunk",
|
371 |
+
"grocery bag",
|
372 |
+
"plastic bin",
|
373 |
+
"pizza box",
|
374 |
+
"cabinet doors",
|
375 |
+
"legs",
|
376 |
+
"car",
|
377 |
+
"shaving cream",
|
378 |
+
"luggage stand",
|
379 |
+
"shredder",
|
380 |
+
"statue",
|
381 |
+
"urinal",
|
382 |
+
"hose",
|
383 |
+
"bike pump",
|
384 |
+
"coatrack",
|
385 |
+
"bear",
|
386 |
+
"wall lamp",
|
387 |
+
"humidifier",
|
388 |
+
"toothpaste",
|
389 |
+
"mouthwash bottle",
|
390 |
+
"poster cutter",
|
391 |
+
"golf bag",
|
392 |
+
"food container",
|
393 |
+
"camera",
|
394 |
+
"table lamp",
|
395 |
+
"yoga mat",
|
396 |
+
"card",
|
397 |
+
"mug",
|
398 |
+
"shower doors",
|
399 |
+
"cardboard",
|
400 |
+
"rack stand",
|
401 |
+
"boxes of paper",
|
402 |
+
"flag",
|
403 |
+
"futon",
|
404 |
+
"magazine",
|
405 |
+
"exit sign",
|
406 |
+
"rolled poster",
|
407 |
+
"wheel",
|
408 |
+
"pictures",
|
409 |
+
"blackboard eraser",
|
410 |
+
"organizer",
|
411 |
+
"doll",
|
412 |
+
"book rack",
|
413 |
+
"laundry bag",
|
414 |
+
"sponge",
|
415 |
+
"seating",
|
416 |
+
"folded chairs",
|
417 |
+
"lotion bottle",
|
418 |
+
"can",
|
419 |
+
"lunch box",
|
420 |
+
"food display",
|
421 |
+
"storage shelf",
|
422 |
+
"sliding wood door",
|
423 |
+
"pants",
|
424 |
+
"wood",
|
425 |
+
"boards",
|
426 |
+
"bottles",
|
427 |
+
"washcloth",
|
428 |
+
"workbench",
|
429 |
+
"open kitchen cabinet",
|
430 |
+
"organizer shelf",
|
431 |
+
"frame",
|
432 |
+
"cups",
|
433 |
+
"exercise ball",
|
434 |
+
"easel",
|
435 |
+
"garbage bag",
|
436 |
+
"roomba",
|
437 |
+
"garage door",
|
438 |
+
"luggage rack",
|
439 |
+
"bike lock",
|
440 |
+
"briefcase",
|
441 |
+
"hand towel",
|
442 |
+
"bath products",
|
443 |
+
"star",
|
444 |
+
"map",
|
445 |
+
"coffee bean bag",
|
446 |
+
"headboard",
|
447 |
+
"ipad",
|
448 |
+
"display rack",
|
449 |
+
"traffic cone",
|
450 |
+
"toiletry",
|
451 |
+
"canopy",
|
452 |
+
"massage chair",
|
453 |
+
"paper organizer",
|
454 |
+
"barricade",
|
455 |
+
"platform",
|
456 |
+
"cap",
|
457 |
+
"dumbbell plates",
|
458 |
+
"elevator",
|
459 |
+
"cooking pan",
|
460 |
+
"trash bag",
|
461 |
+
"santa",
|
462 |
+
"jewelry box",
|
463 |
+
"boat",
|
464 |
+
"sock",
|
465 |
+
"kinect",
|
466 |
+
"crib",
|
467 |
+
"plastic storage bin",
|
468 |
+
"cooler",
|
469 |
+
"kitchen apron",
|
470 |
+
"dishwashing soap bottle",
|
471 |
+
"xbox controller",
|
472 |
+
"banana holder",
|
473 |
+
"ping pong paddle",
|
474 |
+
"airplane",
|
475 |
+
"conditioner bottle",
|
476 |
+
"tea kettle",
|
477 |
+
"bedframe",
|
478 |
+
"wood beam",
|
479 |
+
"toilet paper package",
|
480 |
+
"wall mounted coat rack",
|
481 |
+
"film light",
|
482 |
+
"ceiling lamp",
|
483 |
+
"chain",
|
484 |
+
"sofa",
|
485 |
+
"closet wardrobe",
|
486 |
+
"sweater",
|
487 |
+
"kitchen mixer",
|
488 |
+
"wardrobe",
|
489 |
+
"water softener",
|
490 |
+
"banister",
|
491 |
+
"trolley",
|
492 |
+
"pantry shelf",
|
493 |
+
"sofa bed",
|
494 |
+
"loofa",
|
495 |
+
"shower faucet handle",
|
496 |
+
"toy piano",
|
497 |
+
"fish",
|
498 |
+
"file cabinets",
|
499 |
+
"cat litter box",
|
500 |
+
"electric panel",
|
501 |
+
"suitcases",
|
502 |
+
"curtain rod",
|
503 |
+
"bunk bed",
|
504 |
+
"chandelier",
|
505 |
+
"tape",
|
506 |
+
"plates",
|
507 |
+
"alarm",
|
508 |
+
"fire hose",
|
509 |
+
"toy dinosaur",
|
510 |
+
"cone",
|
511 |
+
"glass doors",
|
512 |
+
"hatrack",
|
513 |
+
"subwoofer",
|
514 |
+
"fire sprinkler",
|
515 |
+
"trash cabinet",
|
516 |
+
"pantry walls",
|
517 |
+
"photo",
|
518 |
+
"barrier",
|
519 |
+
"stacks of cups",
|
520 |
+
"beachball",
|
521 |
+
"folded boxes",
|
522 |
+
"contact lens solution bottle",
|
523 |
+
"covered box",
|
524 |
+
"folder",
|
525 |
+
"mail trays",
|
526 |
+
"slipper",
|
527 |
+
"magazine rack",
|
528 |
+
"sticker",
|
529 |
+
"lotion",
|
530 |
+
"buddha",
|
531 |
+
"file organizer",
|
532 |
+
"paper towel rolls",
|
533 |
+
"night lamp",
|
534 |
+
"fuse box",
|
535 |
+
"knife block",
|
536 |
+
"furnace",
|
537 |
+
"cd cases",
|
538 |
+
"stools",
|
539 |
+
"hand sanitzer dispenser",
|
540 |
+
"teapot",
|
541 |
+
"pen holder",
|
542 |
+
"tray rack",
|
543 |
+
"wig",
|
544 |
+
"switch",
|
545 |
+
"plastic containers",
|
546 |
+
"night light",
|
547 |
+
"notepad",
|
548 |
+
"mail bin",
|
549 |
+
"elevator button",
|
550 |
+
"gaming wheel",
|
551 |
+
"drum set",
|
552 |
+
"cosmetic bag",
|
553 |
+
"coffee mug",
|
554 |
+
"closet shelf",
|
555 |
+
"baby mobile",
|
556 |
+
"diaper bin",
|
557 |
+
"door wall",
|
558 |
+
"stepstool",
|
559 |
+
"paper shredder",
|
560 |
+
"dress rack",
|
561 |
+
"cover",
|
562 |
+
"shopping bag",
|
563 |
+
"sliding door",
|
564 |
+
"exercise bike",
|
565 |
+
"recliner chair",
|
566 |
+
"kitchenaid mixer",
|
567 |
+
"soda can",
|
568 |
+
"stovetop",
|
569 |
+
"stepladder",
|
570 |
+
"tap",
|
571 |
+
"cable",
|
572 |
+
"baby changing station",
|
573 |
+
"costume",
|
574 |
+
"rocking chair",
|
575 |
+
"binder",
|
576 |
+
"media center",
|
577 |
+
"towel rack",
|
578 |
+
"medal",
|
579 |
+
"stack of folded chairs",
|
580 |
+
"telescope",
|
581 |
+
"closet doorframe",
|
582 |
+
"glass",
|
583 |
+
"baseball cap",
|
584 |
+
"battery disposal jar",
|
585 |
+
"mop",
|
586 |
+
"tank",
|
587 |
+
"mail tray",
|
588 |
+
"centerpiece",
|
589 |
+
"stick",
|
590 |
+
"closet floor",
|
591 |
+
"dryer sheets",
|
592 |
+
"bycicle",
|
593 |
+
"flower stand",
|
594 |
+
"air mattress",
|
595 |
+
"clip",
|
596 |
+
"side table",
|
597 |
+
"pizza boxes",
|
598 |
+
"display",
|
599 |
+
"postcard",
|
600 |
+
"display sign",
|
601 |
+
"paper towel",
|
602 |
+
"boots",
|
603 |
+
"tennis racket bag",
|
604 |
+
"air hockey table",
|
605 |
+
"socks",
|
606 |
+
"food bag",
|
607 |
+
"clothes hangers",
|
608 |
+
"starbucks cup"
|
609 |
+
]
|
leo/grounding_head.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from leo.utils import get_mlp_head
|
4 |
+
|
5 |
+
|
6 |
+
class SequentialGroundHead(nn.Module):
|
7 |
+
def __init__(self, hidden_size=4096):
|
8 |
+
super().__init__()
|
9 |
+
# grounding head
|
10 |
+
self.og3d_head = get_mlp_head(
|
11 |
+
hidden_size * 2, hidden_size // 2,
|
12 |
+
1, dropout=0.1
|
13 |
+
)
|
14 |
+
|
15 |
+
def forward(self, obj_embeds, grd_embdes, obj_masks=None):
|
16 |
+
txt_embeds = grd_embdes
|
17 |
+
og3d_logits = self.og3d_head(torch.cat((obj_embeds, txt_embeds.repeat(1, obj_embeds.shape[1], 1)), dim=2)).squeeze(2)
|
18 |
+
if obj_masks is not None:
|
19 |
+
og3d_logits = og3d_logits.masked_fill_(obj_masks.logical_not(), -float('inf'))
|
20 |
+
return og3d_logits
|
leo/img_encoder.py
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import timm
|
2 |
+
import torch
|
3 |
+
import numpy as np
|
4 |
+
import torch.nn as nn
|
5 |
+
from einops import rearrange
|
6 |
+
|
7 |
+
|
8 |
+
def disabled_train(self, mode=True):
|
9 |
+
"""
|
10 |
+
Overwrite model.train with this function to make sure train/eval mode does not change anymore
|
11 |
+
"""
|
12 |
+
return self
|
13 |
+
|
14 |
+
|
15 |
+
def simple_conv_and_linear_weights_init(m):
|
16 |
+
if type(m) in [
|
17 |
+
nn.Conv1d,
|
18 |
+
nn.Conv2d,
|
19 |
+
nn.Conv3d,
|
20 |
+
nn.ConvTranspose1d,
|
21 |
+
nn.ConvTranspose2d,
|
22 |
+
nn.ConvTranspose3d,
|
23 |
+
]:
|
24 |
+
weight_shape = list(m.weight.data.size())
|
25 |
+
fan_in = np.prod(weight_shape[1:4])
|
26 |
+
fan_out = np.prod(weight_shape[2:4]) * weight_shape[0]
|
27 |
+
w_bound = np.sqrt(6.0 / (fan_in + fan_out))
|
28 |
+
m.weight.data.uniform_(-w_bound, w_bound)
|
29 |
+
if m.bias is not None:
|
30 |
+
m.bias.data.fill_(0)
|
31 |
+
elif type(m) == nn.Linear:
|
32 |
+
simple_linear_weights_init(m)
|
33 |
+
|
34 |
+
def simple_linear_weights_init(m):
|
35 |
+
if type(m) == nn.Linear:
|
36 |
+
weight_shape = list(m.weight.data.size())
|
37 |
+
fan_in = weight_shape[1]
|
38 |
+
fan_out = weight_shape[0]
|
39 |
+
w_bound = np.sqrt(6.0 / (fan_in + fan_out))
|
40 |
+
m.weight.data.uniform_(-w_bound, w_bound)
|
41 |
+
if m.bias is not None:
|
42 |
+
m.bias.data.fill_(0)
|
43 |
+
|
44 |
+
|
45 |
+
class Backbone2DWrapper(nn.Module):
|
46 |
+
|
47 |
+
def __init__(self, model, tag, freeze=True):
|
48 |
+
super().__init__()
|
49 |
+
self.model = model
|
50 |
+
self.tag = tag
|
51 |
+
self.freeze = freeze
|
52 |
+
if 'convnext' in tag:
|
53 |
+
self.out_channels = 1024
|
54 |
+
elif 'swin' in tag:
|
55 |
+
self.out_channels = 1024
|
56 |
+
elif 'vit' in tag:
|
57 |
+
self.out_channels = 768
|
58 |
+
elif 'resnet' in tag:
|
59 |
+
self.out_channels = 2048
|
60 |
+
else:
|
61 |
+
raise NotImplementedError
|
62 |
+
|
63 |
+
if freeze:
|
64 |
+
for param in self.parameters():
|
65 |
+
param.requires_grad = False
|
66 |
+
self.eval()
|
67 |
+
self.train = disabled_train
|
68 |
+
|
69 |
+
def forward_normal(self, x, flat_output=False):
|
70 |
+
feat = self.model.forward_features(x)
|
71 |
+
if 'swin' in self.tag:
|
72 |
+
feat = rearrange(feat, 'b h w c -> b c h w')
|
73 |
+
if 'vit_base_32_timm_laion2b' in self.tag or 'vit_base_32_timm_openai' in self.tag:
|
74 |
+
# TODO: [CLS] is prepended to the patches.
|
75 |
+
feat = rearrange(feat[:, 1:], 'b (h w) c -> b c h w', h=7)
|
76 |
+
if flat_output:
|
77 |
+
feat = rearrange(feat, 'b c h w -> b (h w) c')
|
78 |
+
return feat
|
79 |
+
|
80 |
+
@torch.no_grad()
|
81 |
+
def forward_frozen(self, x, flat_output=False):
|
82 |
+
return self.forward_normal(x, flat_output)
|
83 |
+
|
84 |
+
def forward(self, x, flat_output=False):
|
85 |
+
if self.freeze:
|
86 |
+
return self.forward_frozen(x, flat_output)
|
87 |
+
else:
|
88 |
+
return self.forward_normal(x, flat_output)
|
89 |
+
|
90 |
+
def convnext_base_laion2b(pretrained=False, freeze=True, **kwargs):
|
91 |
+
m = timm.create_model(
|
92 |
+
'convnext_base.clip_laion2b',
|
93 |
+
pretrained=pretrained
|
94 |
+
)
|
95 |
+
if kwargs.get('reset_clip_s2b2'):
|
96 |
+
s = m.state_dict()
|
97 |
+
for i in s.keys():
|
98 |
+
if 'stages.3.blocks.2' in i and ('weight' in i or 'bias' in i):
|
99 |
+
s[i].normal_()
|
100 |
+
m.load_state_dict(s, strict=True)
|
101 |
+
|
102 |
+
return Backbone2DWrapper(m, 'convnext_base_laion2b', freeze=freeze)
|
103 |
+
|
104 |
+
|
105 |
+
class GridFeatureExtractor2D(nn.Module):
|
106 |
+
def __init__(self, backbone_name='convnext_base', backbone_pretrain_dataset='laion2b', use_pretrain=True, freeze=True, pooling='avg'):
|
107 |
+
super().__init__()
|
108 |
+
|
109 |
+
init_func_name = '_'.join([backbone_name, backbone_pretrain_dataset])
|
110 |
+
init_func = globals().get(init_func_name)
|
111 |
+
if init_func and callable(init_func):
|
112 |
+
self.backbone = init_func(pretrained=use_pretrain, freeze=freeze)
|
113 |
+
else:
|
114 |
+
raise NotImplementedError(f"Backbone2D does not support {init_func_name}")
|
115 |
+
|
116 |
+
self.pooling = pooling
|
117 |
+
if self.pooling:
|
118 |
+
if self.pooling == 'avg':
|
119 |
+
self.pooling_layers = nn.Sequential(
|
120 |
+
nn.AdaptiveAvgPool2d(output_size=(1,1)),
|
121 |
+
nn.Flatten()
|
122 |
+
)
|
123 |
+
self.out_channels = self.backbone.out_channels
|
124 |
+
elif self.pooling == 'conv':
|
125 |
+
self.pooling_layers = nn.Sequential(
|
126 |
+
nn.Conv2d(self.backbone.out_channels, 64, 1),
|
127 |
+
nn.ReLU(inplace=True),
|
128 |
+
nn.Conv2d(64, 32, 1),
|
129 |
+
nn.Flatten()
|
130 |
+
)
|
131 |
+
self.pooling_layers.apply(simple_conv_and_linear_weights_init)
|
132 |
+
self.out_channels = 32 * 7 * 7 # hardcode for 224x224
|
133 |
+
elif self.pooling in ['attn', 'attention']:
|
134 |
+
self.visual_attention = nn.Sequential(
|
135 |
+
nn.Conv2d(self.backbone.out_channels, self.backbone.out_channels, 1),
|
136 |
+
nn.ReLU(inplace=True),
|
137 |
+
nn.Conv2d(self.backbone.out_channels, self.backbone.out_channels, 1),
|
138 |
+
)
|
139 |
+
self.visual_attention.apply(simple_conv_and_linear_weights_init)
|
140 |
+
def _attention_pooling(x):
|
141 |
+
B, C, H, W = x.size()
|
142 |
+
attn = self.visual_attention(x)
|
143 |
+
attn = attn.view(B, C, -1)
|
144 |
+
x = x.view(B, C, -1)
|
145 |
+
attn = attn.softmax(dim=-1)
|
146 |
+
x = torch.einsum('b c n, b c n -> b c', x, x)
|
147 |
+
return x
|
148 |
+
self.pooling_layers = _attention_pooling
|
149 |
+
self.out_channels = self.backbone.out_channels
|
150 |
+
else:
|
151 |
+
raise NotImplementedError(f"Backbone2D does not support {self.pooling} pooling")
|
152 |
+
else:
|
153 |
+
self.out_channels = self.backbone.out_channels
|
154 |
+
|
155 |
+
def forward(self, x):
|
156 |
+
if self.pooling:
|
157 |
+
x = self.backbone(x, flat_output=False)
|
158 |
+
x = self.pooling_layers(x).unsqueeze(1)
|
159 |
+
return x
|
160 |
+
else:
|
161 |
+
return self.backbone(x, flat_output=True)
|
leo/inference.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
import numpy as np
|
5 |
+
from leo.model import SequentialGrounder
|
6 |
+
from leo.utils import LabelConverter, convert_pc_to_box, obj_processing_post, pad_sequence
|
7 |
+
from torch.utils.data import default_collate
|
8 |
+
|
9 |
+
|
10 |
+
ASSET_DIR = os.path.join(os.getcwd(), 'assets')
|
11 |
+
CKPT_DIR = os.path.join(os.getcwd(), 'checkpoint/leo')
|
12 |
+
int2cat = json.load(open(os.path.join(ASSET_DIR, "meta/scannetv2_raw_categories.json"), 'r', encoding="utf-8"))
|
13 |
+
cat2int = {w: i for i, w in enumerate(int2cat)}
|
14 |
+
label_converter = LabelConverter(os.path.join(ASSET_DIR, "meta/scannetv2-labels.combined.tsv"))
|
15 |
+
|
16 |
+
|
17 |
+
role_prompt = "You are an AI visual assistant situated in a 3D scene. "\
|
18 |
+
"You can perceive (1) an ego-view image (accessible when necessary) and (2) the objects (including yourself) in the scene (always accessible). "\
|
19 |
+
"You should properly respond to the USER's instruction according to the given visual information. "
|
20 |
+
#role_prompt = " "
|
21 |
+
egoview_prompt = "Ego-view image:"
|
22 |
+
objects_prompt = "Objects (including you) in the scene:"
|
23 |
+
task_prompt = "USER: {instruction} ASSISTANT:"
|
24 |
+
|
25 |
+
def get_prompt(instruction):
|
26 |
+
return {
|
27 |
+
'prompt_before_obj': role_prompt,
|
28 |
+
'prompt_middle_1': egoview_prompt,
|
29 |
+
'prompt_middle_2': objects_prompt,
|
30 |
+
'prompt_after_obj': task_prompt.format(instruction=instruction),
|
31 |
+
}
|
32 |
+
|
33 |
+
def get_lang(task_item):
|
34 |
+
task_description = task_item['task_description']
|
35 |
+
sentence = task_description
|
36 |
+
data_dict = get_prompt(task_description)
|
37 |
+
|
38 |
+
# scan_id = task_item['scan_id']
|
39 |
+
|
40 |
+
if 'action_steps' in task_item:
|
41 |
+
action_steps = task_item['action_steps']
|
42 |
+
# tgt_object_id = [int(action['target_id']) for action in action_steps]
|
43 |
+
# tgt_object_name = [action['label'] for action in action_steps]
|
44 |
+
|
45 |
+
for action in action_steps:
|
46 |
+
sentence += ' ' + action['action']
|
47 |
+
|
48 |
+
data_dict['output_gt'] = ' '.join([action['action'] + ' <s>' for action in action_steps])
|
49 |
+
|
50 |
+
# return scan_id, tgt_object_id, tgt_object_name, sentence, data_dict
|
51 |
+
return data_dict
|
52 |
+
|
53 |
+
|
54 |
+
def load_data(scan_id):
|
55 |
+
one_scan = {}
|
56 |
+
# load scan
|
57 |
+
pcd_data = torch.load(os.path.join(ASSET_DIR, f'inputs/{scan_id}', f'{scan_id}_pcd.pth'))
|
58 |
+
inst_to_label = torch.load(os.path.join(ASSET_DIR, f'inputs/{scan_id}', f'{scan_id}_inst.pth'))
|
59 |
+
points, colors, instance_labels = pcd_data[0], pcd_data[1], pcd_data[-1]
|
60 |
+
colors = colors / 127.5 - 1
|
61 |
+
pcds = np.concatenate([points, colors], 1)
|
62 |
+
one_scan['pcds'] = pcds
|
63 |
+
one_scan['instance_labels'] = instance_labels
|
64 |
+
one_scan['inst_to_label'] = inst_to_label
|
65 |
+
# convert to gt object
|
66 |
+
obj_pcds = []
|
67 |
+
inst_ids = []
|
68 |
+
inst_labels = []
|
69 |
+
bg_indices = np.full((points.shape[0], ), 1, dtype=np.bool_)
|
70 |
+
for inst_id in inst_to_label.keys():
|
71 |
+
if inst_to_label[inst_id] in cat2int.keys():
|
72 |
+
mask = instance_labels == inst_id
|
73 |
+
if np.sum(mask) == 0:
|
74 |
+
continue
|
75 |
+
obj_pcds.append(pcds[mask])
|
76 |
+
inst_ids.append(inst_id)
|
77 |
+
inst_labels.append(cat2int[inst_to_label[inst_id]])
|
78 |
+
if inst_to_label[inst_id] not in ['wall', 'floor', 'ceiling']:
|
79 |
+
bg_indices[mask] = False
|
80 |
+
one_scan['obj_pcds'] = obj_pcds
|
81 |
+
one_scan['inst_labels'] = inst_labels
|
82 |
+
one_scan['inst_ids'] = inst_ids
|
83 |
+
one_scan['bg_pcds'] = pcds[bg_indices]
|
84 |
+
# calculate box for matching
|
85 |
+
obj_center = []
|
86 |
+
obj_box_size = []
|
87 |
+
for obj_pcd in obj_pcds:
|
88 |
+
_c, _b = convert_pc_to_box(obj_pcd)
|
89 |
+
obj_center.append(_c)
|
90 |
+
obj_box_size.append(_b)
|
91 |
+
one_scan['obj_loc'] = obj_center
|
92 |
+
one_scan['obj_box'] = obj_box_size
|
93 |
+
# load point feat
|
94 |
+
feat_pth = os.path.join(ASSET_DIR, f'inputs/{scan_id}', 'obj_feats.pth')
|
95 |
+
one_scan['obj_feats'] = torch.load(feat_pth).to('cpu')
|
96 |
+
# convert to pq3d input
|
97 |
+
obj_labels = one_scan['inst_labels'] # N
|
98 |
+
obj_pcds = one_scan['obj_pcds']
|
99 |
+
obj_ids = one_scan['inst_ids']
|
100 |
+
# object filter
|
101 |
+
excluded_labels = ['wall', 'floor', 'ceiling']
|
102 |
+
def keep_obj(i, obj_label):
|
103 |
+
category = int2cat[obj_label]
|
104 |
+
# filter out background
|
105 |
+
if category in excluded_labels:
|
106 |
+
return False
|
107 |
+
# filter out objects not mentioned in the sentence
|
108 |
+
return True
|
109 |
+
selected_obj_idxs = [i for i, obj_label in enumerate(obj_labels) if keep_obj(i, obj_label)]
|
110 |
+
# crop objects to max_obj_len and reorganize ids ? # TODO
|
111 |
+
obj_labels = [obj_labels[i] for i in selected_obj_idxs]
|
112 |
+
obj_pcds = [obj_pcds[i] for i in selected_obj_idxs]
|
113 |
+
# subsample points
|
114 |
+
obj_pcds = np.array([obj_pcd[np.random.choice(len(obj_pcd), size=1024,
|
115 |
+
replace=len(obj_pcd) < 1024)] for obj_pcd in obj_pcds])
|
116 |
+
obj_fts, obj_locs, obj_boxes, rot_matrix = obj_processing_post(obj_pcds, rot_aug=False)
|
117 |
+
data_dict = {
|
118 |
+
"scan_id": scan_id,
|
119 |
+
"obj_fts": obj_fts.float(),
|
120 |
+
"obj_locs": obj_locs.float(),
|
121 |
+
"obj_labels": torch.LongTensor(obj_labels),
|
122 |
+
"obj_boxes": obj_boxes,
|
123 |
+
"obj_pad_masks": torch.ones((len(obj_locs)), dtype=torch.bool), # used for padding in collate
|
124 |
+
"obj_ids": torch.LongTensor([obj_ids[i] for i in selected_obj_idxs])
|
125 |
+
}
|
126 |
+
# convert point feature
|
127 |
+
data_dict['obj_feats'] = one_scan['obj_feats'].squeeze(0)
|
128 |
+
|
129 |
+
useful_keys = ['tgt_object_id', 'scan_id', 'obj_labels', 'data_idx',
|
130 |
+
'obj_fts', 'obj_locs', 'obj_pad_masks', 'obj_ids',
|
131 |
+
'source', 'prompt_before_obj', 'prompt_middle_1',
|
132 |
+
'prompt_middle_2', 'prompt_after_obj', 'output_gt', 'obj_feats']
|
133 |
+
for k in list(data_dict.keys()):
|
134 |
+
if k not in useful_keys:
|
135 |
+
del data_dict[k]
|
136 |
+
# add new keys because of leo
|
137 |
+
data_dict['img_fts'] = torch.zeros(3, 224, 224)
|
138 |
+
data_dict['img_masks'] = torch.LongTensor([0]).bool()
|
139 |
+
data_dict['anchor_locs'] = torch.zeros(3)
|
140 |
+
data_dict['anchor_orientation'] = torch.zeros(4)
|
141 |
+
data_dict['anchor_orientation'][-1] = 1 # xyzw
|
142 |
+
# convert to leo format
|
143 |
+
data_dict['obj_masks'] = data_dict['obj_pad_masks']
|
144 |
+
del data_dict['obj_pad_masks']
|
145 |
+
|
146 |
+
return data_dict
|
147 |
+
|
148 |
+
def form_batch(data_dict):
|
149 |
+
batch = [data_dict]
|
150 |
+
new_batch = {}
|
151 |
+
|
152 |
+
# pad
|
153 |
+
padding_keys = ['obj_fts', 'obj_locs', 'obj_masks', 'obj_labels', 'obj_ids']
|
154 |
+
for k in padding_keys:
|
155 |
+
tensors = [sample.pop(k) for sample in batch]
|
156 |
+
padded_tensor = pad_sequence(tensors, pad=0)
|
157 |
+
new_batch[k] = padded_tensor
|
158 |
+
# # list
|
159 |
+
# list_keys = ['tgt_object_id']
|
160 |
+
# for k in list_keys:
|
161 |
+
# new_batch[k] = [sample.pop(k) for sample in batch]
|
162 |
+
|
163 |
+
# default collate
|
164 |
+
new_batch.update(default_collate(batch))
|
165 |
+
return new_batch
|
166 |
+
|
167 |
+
|
168 |
+
def inference(scan_id, task, predict_mode=False):
|
169 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
170 |
+
# device = 'cpu' # ok for predict_mode=False, and both for Gradio demo local preview
|
171 |
+
|
172 |
+
data_dict = load_data(scan_id)
|
173 |
+
data_dict.update(get_lang(task))
|
174 |
+
data_dict = form_batch(data_dict)
|
175 |
+
|
176 |
+
for key, value in data_dict.items():
|
177 |
+
if isinstance(value, torch.Tensor):
|
178 |
+
data_dict[key] = value.to(device)
|
179 |
+
|
180 |
+
model = SequentialGrounder(predict_mode)
|
181 |
+
load_msg = model.load_state_dict(torch.load(os.path.join(CKPT_DIR, 'pytorch_model.bin'), map_location='cpu'), strict=False)
|
182 |
+
model.to(device)
|
183 |
+
|
184 |
+
data_dict = model(data_dict)
|
185 |
+
|
186 |
+
if predict_mode == False:
|
187 |
+
# calculate result id
|
188 |
+
result_id_list = [data_dict['obj_ids'][0][torch.argmax(data_dict['ground_logits'][i]).item()]
|
189 |
+
for i in range(len(data_dict['ground_logits']))]
|
190 |
+
else:
|
191 |
+
# calculate langauge
|
192 |
+
# tgt_object_id = data_dict['tgt_object_id']
|
193 |
+
if data_dict['ground_logits'] == None:
|
194 |
+
og_pred = []
|
195 |
+
else:
|
196 |
+
og_pred = torch.argmax(data_dict['ground_logits'], dim=1)
|
197 |
+
grd_batch_ind_list = data_dict['grd_batch_ind_list']
|
198 |
+
|
199 |
+
response_pred = []
|
200 |
+
for i in range(1): # len(tgt_object_id)
|
201 |
+
# target_sequence = list(tgt_object_id[i].cpu().numpy())
|
202 |
+
predict_sequence = []
|
203 |
+
if og_pred != None:
|
204 |
+
for j in range(len(og_pred)):
|
205 |
+
if grd_batch_ind_list[j] == i:
|
206 |
+
predict_sequence.append(og_pred[j].item())
|
207 |
+
|
208 |
+
obj_ids = data_dict['obj_ids']
|
209 |
+
response_pred.append({
|
210 |
+
'predict_object_id' : [obj_ids[i][o].item() for o in predict_sequence],
|
211 |
+
'predict_object_id': [obj_ids[i][o].item() for o in predict_sequence],
|
212 |
+
'pred_plan_text': data_dict['output_txt'][i]
|
213 |
+
})
|
214 |
+
|
215 |
+
return result_id_list if predict_mode == False else response_pred
|
216 |
+
|
217 |
+
if __name__ == '__main__':
|
218 |
+
inference("scene0050_00", {
|
219 |
+
"task_description": "Find the chair and move it to the table.",
|
220 |
+
"action_steps": [
|
221 |
+
{
|
222 |
+
"target_id": "1",
|
223 |
+
"label": "chair",
|
224 |
+
"action": "Find the chair."
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"target_id": "2",
|
228 |
+
"label": "table",
|
229 |
+
"action": "Move the chair to the table."
|
230 |
+
}
|
231 |
+
],
|
232 |
+
"scan_id": "scene0050_00"
|
233 |
+
}, predict_mode=True)
|
234 |
+
|
leo/model.py
ADDED
@@ -0,0 +1,477 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import contextlib
|
2 |
+
import math
|
3 |
+
|
4 |
+
import clip
|
5 |
+
import torch
|
6 |
+
import torch.nn as nn
|
7 |
+
import torch.nn.functional as F
|
8 |
+
from einops import rearrange
|
9 |
+
from peft import LoraConfig, get_peft_model
|
10 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer
|
11 |
+
from leo.img_encoder import GridFeatureExtractor2D
|
12 |
+
from leo.pcd_encoder import OSE3D
|
13 |
+
from leo.grounding_head import SequentialGroundHead
|
14 |
+
from leo.utils import get_mlp_head
|
15 |
+
|
16 |
+
|
17 |
+
def maybe_autocast(model, dtype='bf16', enabled=True): ### not-half mode
|
18 |
+
# if on cpu, don't use autocast
|
19 |
+
# if on gpu, use autocast with dtype if provided, otherwise use torch.float16
|
20 |
+
enable_autocast = model.device != torch.device('cpu')
|
21 |
+
|
22 |
+
if dtype == 'bf16':
|
23 |
+
dtype = torch.bfloat16
|
24 |
+
elif dtype == 'fp16':
|
25 |
+
dtype == torch.float16
|
26 |
+
else:
|
27 |
+
dtype = torch.float32
|
28 |
+
|
29 |
+
if enable_autocast:
|
30 |
+
return torch.cuda.amp.autocast(dtype=dtype, enabled=enabled)
|
31 |
+
else:
|
32 |
+
return contextlib.nullcontext()
|
33 |
+
|
34 |
+
def disabled_train(self, mode=True):
|
35 |
+
"""
|
36 |
+
Overwrite model.train with this function to make sure train/eval mode does not change anymore
|
37 |
+
"""
|
38 |
+
return self
|
39 |
+
|
40 |
+
|
41 |
+
class SequentialGrounder(torch.nn.Module):
|
42 |
+
def __init__(self,predict_mode=False):
|
43 |
+
super().__init__()
|
44 |
+
cfg = {
|
45 |
+
"model": {
|
46 |
+
"llm": {
|
47 |
+
"name": "Vicuna7B",
|
48 |
+
"cfg_path": "/scratch/generalvision/vicuna-7b",
|
49 |
+
"truncation_side": "right",
|
50 |
+
"max_context_len": 256,
|
51 |
+
"max_out_len": 256,
|
52 |
+
"lora": {
|
53 |
+
"flag": True,
|
54 |
+
"rank": 16,
|
55 |
+
"alpha": 16,
|
56 |
+
"dropout": 0.0,
|
57 |
+
"target_modules": ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],
|
58 |
+
},
|
59 |
+
},
|
60 |
+
"clip_txt_guidance": {
|
61 |
+
"flag": False,
|
62 |
+
"clip_out_dim": 1024,
|
63 |
+
},
|
64 |
+
},
|
65 |
+
}
|
66 |
+
|
67 |
+
self.predict_mode = predict_mode
|
68 |
+
|
69 |
+
# LLM
|
70 |
+
llm_name = 'Vicuna7B'
|
71 |
+
llm_cfg_path = '/scratch/generalvision/vicuna-7b'
|
72 |
+
llm_truncation_side = 'right'
|
73 |
+
if 'vicuna' in llm_name.lower():
|
74 |
+
self.llm_tokenizer = LlamaTokenizer.from_pretrained(llm_cfg_path, truncation_side=llm_truncation_side)
|
75 |
+
self.llm_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
|
76 |
+
self.llm_model = LlamaForCausalLM.from_pretrained(llm_cfg_path, torch_dtype=torch.float32) # not-half mode torch_dtype=torch.float16
|
77 |
+
self.llm_model.resize_token_embeddings(len(self.llm_tokenizer))
|
78 |
+
else:
|
79 |
+
self.llm_tokenizer = AutoTokenizer.from_pretrained(llm_cfg_path, truncation_side=llm_truncation_side)
|
80 |
+
self.llm_model = AutoModelForCausalLM.from_pretrained(llm_cfg_path, torch_dtype=torch.float16)
|
81 |
+
|
82 |
+
for param in self.llm_model.parameters():
|
83 |
+
param.requires_grad = False
|
84 |
+
self.llm_model.eval()
|
85 |
+
self.llm_model.train = disabled_train
|
86 |
+
|
87 |
+
# 2D vision
|
88 |
+
self.img_encoder = GridFeatureExtractor2D()
|
89 |
+
self.img_proj = nn.Linear(
|
90 |
+
self.img_encoder.out_channels, self.llm_model.config.hidden_size
|
91 |
+
)
|
92 |
+
|
93 |
+
# 3D vision
|
94 |
+
self.pcd_encoder = OSE3D()
|
95 |
+
self.pcd_proj = nn.Linear(256, self.llm_model.config.hidden_size)
|
96 |
+
|
97 |
+
# type embedding
|
98 |
+
# self.img_type_embed = nn.Parameter(torch.zeros(self.llm_model.config.hidden_size), requires_grad=True)
|
99 |
+
# self.pcd_type_embed = nn.Parameter(torch.zeros(self.llm_model.config.hidden_size), requires_grad=True)
|
100 |
+
|
101 |
+
# LoRA
|
102 |
+
if cfg['model']['llm']['lora']['flag']:
|
103 |
+
lora_config = LoraConfig(
|
104 |
+
r=cfg['model']['llm']['lora']['rank'],
|
105 |
+
lora_alpha=cfg['model']['llm']['lora']['alpha'],
|
106 |
+
target_modules=cfg['model']['llm']['lora']['target_modules'],
|
107 |
+
lora_dropout=cfg['model']['llm']['lora']['dropout'],
|
108 |
+
bias='none',
|
109 |
+
modules_to_save=[],
|
110 |
+
)
|
111 |
+
self.llm_model = get_peft_model(self.llm_model, peft_config=lora_config)
|
112 |
+
|
113 |
+
self.max_context_len = 256
|
114 |
+
self.max_out_len = 256
|
115 |
+
|
116 |
+
# additional text x multi-modal tokens fusion
|
117 |
+
self.clip_txt_guidance = cfg['model']['clip_txt_guidance']['flag']
|
118 |
+
if self.clip_txt_guidance:
|
119 |
+
self.clip_model = clip.load('RN50')[0]
|
120 |
+
for param in self.clip_model.parameters():
|
121 |
+
param.requires_grad = False
|
122 |
+
self.clip_model.eval()
|
123 |
+
self.clip_model.train = disabled_train
|
124 |
+
self.clip_proj = nn.Linear(cfg['clip_txt_guidance']['clip_out_dim'], self.llm_model.config.hidden_size)
|
125 |
+
|
126 |
+
# grounding head
|
127 |
+
self.ground_head = SequentialGroundHead()
|
128 |
+
self.obj_cls_head = get_mlp_head(4096, 768, 607, 0.3)
|
129 |
+
self.pre_grounding = True
|
130 |
+
|
131 |
+
@property
|
132 |
+
def device(self):
|
133 |
+
return list(self.parameters())[0].device
|
134 |
+
|
135 |
+
def build_right_justified_sequence(self, data_dict):
|
136 |
+
"""
|
137 |
+
Concat six sequences: `prompt_before_obj`, `prompt_middle_1`, `img_tokens`, `prompt_middle_2`, `obj_tokens`, `prompt_after_obj`.
|
138 |
+
Return right justified sequence for causal LM: <pad>, <role/situation>, <img>, <objs>, <instruction>.
|
139 |
+
"""
|
140 |
+
device = self.device
|
141 |
+
bs = len(data_dict['prompt_before_obj'])
|
142 |
+
|
143 |
+
self.llm_tokenizer.padding_side = 'left'
|
144 |
+
text_input_tokens_pre = self.llm_tokenizer(
|
145 |
+
data_dict['prompt_before_obj'],
|
146 |
+
return_tensors='pt',
|
147 |
+
padding='longest'
|
148 |
+
).to(device) # [PAD, BOS, tokens], (B, T1)
|
149 |
+
|
150 |
+
text_input_tokens_mid1 = self.llm_tokenizer(
|
151 |
+
data_dict['prompt_middle_1'],
|
152 |
+
return_tensors='pt',
|
153 |
+
padding='longest'
|
154 |
+
).to(device)
|
155 |
+
|
156 |
+
img_tokens = data_dict['img_tokens'].to(device)
|
157 |
+
img_masks = data_dict['img_masks'].to(device)
|
158 |
+
img_masks = img_masks.reshape(-1, 1).repeat(1, img_tokens.size(1))
|
159 |
+
|
160 |
+
text_input_tokens_mid2 = self.llm_tokenizer(
|
161 |
+
data_dict['prompt_middle_2'],
|
162 |
+
return_tensors='pt',
|
163 |
+
padding='longest'
|
164 |
+
).to(device)
|
165 |
+
|
166 |
+
obj_tokens = data_dict['obj_tokens'].to(device)
|
167 |
+
obj_masks = data_dict['obj_masks'].to(device)
|
168 |
+
|
169 |
+
# additional clip fusion
|
170 |
+
if self.clip_txt_guidance:
|
171 |
+
with torch.no_grad():
|
172 |
+
clip_fts = self.clip_model.encode_text(
|
173 |
+
clip.tokenize(data_dict['prompt_after_obj'], truncate=True).to(device)
|
174 |
+
)
|
175 |
+
clip_fts = self.clip_proj(clip_fts)
|
176 |
+
# B, N, C
|
177 |
+
img_tokens = torch.einsum('bnc,bc->bnc', img_tokens, clip_fts)
|
178 |
+
obj_tokens = torch.einsum('bnc,bc->bnc', obj_tokens, clip_fts)
|
179 |
+
|
180 |
+
self.llm_tokenizer.padding_side = 'right' # no need to be 'left', as padding tokens will be shifted
|
181 |
+
self.llm_tokenizer.truncation_side = 'left' # truncate history
|
182 |
+
text_input_tokens_post = self.llm_tokenizer(
|
183 |
+
data_dict['prompt_after_obj'],
|
184 |
+
return_tensors='pt',
|
185 |
+
padding='longest',
|
186 |
+
truncation=True,
|
187 |
+
max_length=self.max_context_len,
|
188 |
+
).to(device) # [BOS, tokens, PAD], (B, T3)
|
189 |
+
|
190 |
+
assert text_input_tokens_mid1.attention_mask.all() and text_input_tokens_mid2.attention_mask.all(), \
|
191 |
+
"prompt_middle should be the same and thus no padding"
|
192 |
+
|
193 |
+
# remove bos, make "tokenize subseq and concat" equivalent to "tokenize the whole seq"
|
194 |
+
text_input_tokens_mid1.input_ids = text_input_tokens_mid1.input_ids[:, 1:]
|
195 |
+
text_input_tokens_mid1.attention_mask = text_input_tokens_mid1.attention_mask[:, 1:]
|
196 |
+
text_input_tokens_mid2.input_ids = text_input_tokens_mid2.input_ids[:, 1:]
|
197 |
+
text_input_tokens_mid2.attention_mask = text_input_tokens_mid2.attention_mask[:, 1:]
|
198 |
+
text_input_tokens_post.input_ids = text_input_tokens_post.input_ids[:, 1:]
|
199 |
+
text_input_tokens_post.attention_mask = text_input_tokens_post.attention_mask[:, 1:]
|
200 |
+
for i in range(bs):
|
201 |
+
if not img_masks[i].any():
|
202 |
+
# no image input, also mask the text prompt for image tokens
|
203 |
+
text_input_tokens_mid1.attention_mask[i].fill_(0)
|
204 |
+
|
205 |
+
inputs_embeds_pre = self.llm_model.get_input_embeddings()(text_input_tokens_pre.input_ids)
|
206 |
+
inputs_embeds_mid1 = self.llm_model.get_input_embeddings()(text_input_tokens_mid1.input_ids)
|
207 |
+
inputs_embeds_mid2 = self.llm_model.get_input_embeddings()(text_input_tokens_mid2.input_ids)
|
208 |
+
inputs_embeds_post = self.llm_model.get_input_embeddings()(text_input_tokens_post.input_ids)
|
209 |
+
|
210 |
+
# since img_tokens, prompt_mid, obj_tokens are fixed length without padding, we concat them first
|
211 |
+
inputs_embeds_mid = torch.cat([inputs_embeds_mid1, img_tokens, inputs_embeds_mid2, obj_tokens], dim=1)
|
212 |
+
attn_mask_mid = torch.cat(
|
213 |
+
[text_input_tokens_mid1.attention_mask, img_masks, text_input_tokens_mid2.attention_mask, obj_masks],
|
214 |
+
dim=1,
|
215 |
+
)
|
216 |
+
|
217 |
+
post_pad_length = torch.logical_not(text_input_tokens_post.attention_mask).sum(-1)
|
218 |
+
|
219 |
+
bs, l1, hidden_dim = inputs_embeds_pre.shape
|
220 |
+
_, l2, _ = inputs_embeds_mid.shape
|
221 |
+
_, l3, _ = inputs_embeds_post.shape
|
222 |
+
|
223 |
+
inputs_embeds = torch.zeros(bs, l1+l2+l3, hidden_dim).type(inputs_embeds_pre.dtype).to(device)
|
224 |
+
attention_mask = torch.zeros(bs, l1+l2+l3).type(obj_masks.dtype).to(device)
|
225 |
+
|
226 |
+
# assign by chunks
|
227 |
+
for i in range(bs):
|
228 |
+
post_pad_len = post_pad_length[i]
|
229 |
+
|
230 |
+
if post_pad_len > 0:
|
231 |
+
inputs_embeds[i, :post_pad_len] = inputs_embeds_post[i, -post_pad_len:]
|
232 |
+
attention_mask[i, :post_pad_len] = 0
|
233 |
+
inputs_embeds[i, post_pad_len+l1+l2:] = inputs_embeds_post[i, :-post_pad_len]
|
234 |
+
attention_mask[i, post_pad_len+l1+l2:] = 1
|
235 |
+
else:
|
236 |
+
# no padding
|
237 |
+
inputs_embeds[i, -l3:] = inputs_embeds_post[i]
|
238 |
+
attention_mask[i, -l3:] = 1
|
239 |
+
|
240 |
+
inputs_embeds[i, post_pad_len: post_pad_len+l1] = inputs_embeds_pre[i]
|
241 |
+
attention_mask[i, post_pad_len: post_pad_len+l1] = text_input_tokens_pre.attention_mask[i]
|
242 |
+
|
243 |
+
inputs_embeds[i, post_pad_len+l1: post_pad_len+l1+l2] = inputs_embeds_mid[i]
|
244 |
+
attention_mask[i, post_pad_len+l1: post_pad_len+l1+l2] = attn_mask_mid[i]
|
245 |
+
|
246 |
+
return inputs_embeds, attention_mask, (l1, l2, l3)
|
247 |
+
|
248 |
+
def forward(self, data_dict):
|
249 |
+
if self.predict_mode:
|
250 |
+
return self.generate(data_dict=data_dict)
|
251 |
+
"""
|
252 |
+
data_dict requires keys:
|
253 |
+
# input
|
254 |
+
prompt_before_obj: list of str, (B,)
|
255 |
+
prompt_middle_1: list of str, (B,)
|
256 |
+
prompt_middle_2: list of str, (B,)
|
257 |
+
prompt_after_obj: list of str, (B,)
|
258 |
+
obj_fts: (B, N, P, 6), xyz + rgb
|
259 |
+
obj_masks: (B, N), 1 valid and 0 masked
|
260 |
+
obj_locs: (B, N, 6), xyz + whd
|
261 |
+
anchor_locs: (B, 3)
|
262 |
+
anchor_orientation: (B, C)
|
263 |
+
img_fts: (B, 3, H, W), rgb
|
264 |
+
img_masks: (B, 1), 1 valid and 0 masked
|
265 |
+
# output
|
266 |
+
output_gt: list of str, (B,)
|
267 |
+
"""
|
268 |
+
device = self.device
|
269 |
+
bs = len(data_dict['prompt_after_obj'])
|
270 |
+
data_dict['bs'] = bs
|
271 |
+
if 'obj_tokens' not in data_dict:
|
272 |
+
# obtain obj tokens
|
273 |
+
data_dict = self.pcd_encoder(data_dict)
|
274 |
+
# TO CHANGE FOR DEBUG
|
275 |
+
#self.llm_model.float()
|
276 |
+
#data_dict['obj_tokens'] = torch.zeros((data_dict['obj_locs'].shape[0], data_dict['obj_locs'].shape[1], 256)).to(device=device)
|
277 |
+
|
278 |
+
data_dict['obj_tokens'] = self.pcd_proj(data_dict['obj_tokens'].to(device))
|
279 |
+
# data_dict['obj_tokens'] = data_dict['obj_tokens'] + self.pcd_type_embed
|
280 |
+
|
281 |
+
data_dict['img_tokens'] = self.img_proj(self.img_encoder(data_dict['img_fts']))
|
282 |
+
# data_dict['img_tokens'] = data_dict['img_tokens'] + self.img_type_embed
|
283 |
+
|
284 |
+
# build input embdes and record prompt position
|
285 |
+
inputs_embeds, attention_mask, input_length = self.build_right_justified_sequence(data_dict=data_dict)
|
286 |
+
obj_token_length = data_dict['obj_masks'].shape[1]
|
287 |
+
# (B, T1+O+T2, D), (B, T1+O+T2)
|
288 |
+
|
289 |
+
self.llm_tokenizer.padding_side = 'right'
|
290 |
+
self.llm_tokenizer.truncation_side = 'right'
|
291 |
+
text_output_tokens = self.llm_tokenizer(
|
292 |
+
[t + self.llm_tokenizer.eos_token for t in data_dict['output_gt']],
|
293 |
+
return_tensors='pt',
|
294 |
+
padding='longest',
|
295 |
+
truncation=True,
|
296 |
+
max_length=self.max_out_len,
|
297 |
+
).to(device)
|
298 |
+
# record position for special token [SOS]
|
299 |
+
grd_token_id = self.llm_tokenizer.convert_tokens_to_ids(['<s>'])[0]
|
300 |
+
out_input_ids_remove_first_sos = text_output_tokens.input_ids.clone()
|
301 |
+
out_input_ids_remove_first_sos[:, 0] = -100
|
302 |
+
grd_ind_0, grd_ind_1 = (out_input_ids_remove_first_sos == grd_token_id).nonzero(as_tuple=True)
|
303 |
+
|
304 |
+
|
305 |
+
text_output_embeds = self.llm_model.get_input_embeddings()(text_output_tokens.input_ids) # (B, T3, D)
|
306 |
+
inputs_embeds = torch.cat([inputs_embeds, text_output_embeds], dim=1) # (B, T1+O+T2+T3, D)
|
307 |
+
attention_mask = torch.cat([attention_mask, text_output_tokens.attention_mask], dim=1) # (B, T1+O+T2+T3)
|
308 |
+
|
309 |
+
# construct targets
|
310 |
+
targets = torch.zeros_like(attention_mask).long().fill_(-100) # (B, T1+O+T2+T3)
|
311 |
+
|
312 |
+
# only apply loss to answer tokens
|
313 |
+
targets_idx = text_output_tokens.attention_mask.bool()
|
314 |
+
targets[:, -targets_idx.shape[1]:][targets_idx] = text_output_tokens.input_ids[targets_idx]
|
315 |
+
|
316 |
+
# do not predict bos token, regard it as condition instead
|
317 |
+
targets[:, -targets_idx.shape[1]] = -100
|
318 |
+
|
319 |
+
with maybe_autocast(self):
|
320 |
+
outputs = self.llm_model(
|
321 |
+
inputs_embeds=inputs_embeds.float(), # not-half mode
|
322 |
+
attention_mask=attention_mask,
|
323 |
+
return_dict=True,
|
324 |
+
output_hidden_states=True,
|
325 |
+
)
|
326 |
+
|
327 |
+
logits = outputs.logits.float()
|
328 |
+
last_hidden_state = outputs.hidden_states[-1]
|
329 |
+
|
330 |
+
# different from the loss inside `llm_model.forward`, here we take mean of each sequence instead of sum
|
331 |
+
shift_logits = logits[..., :-1, :].contiguous()
|
332 |
+
shift_labels = targets[..., 1:].contiguous()
|
333 |
+
num_tokens_for_loss = (shift_labels >= 0).int().sum(1) # (B,)
|
334 |
+
|
335 |
+
shift_logits = rearrange(shift_logits, 'b t v -> (b t) v')
|
336 |
+
shift_labels = rearrange(shift_labels, 'b t -> (b t)')
|
337 |
+
|
338 |
+
shift_labels = shift_labels.to(shift_logits.device)
|
339 |
+
|
340 |
+
# record for llm loss
|
341 |
+
data_dict['llm_logits'] = shift_logits
|
342 |
+
data_dict['llm_labels'] = shift_labels
|
343 |
+
data_dict['num_tokens_for_loss'] = num_tokens_for_loss
|
344 |
+
|
345 |
+
# record for grounding loss
|
346 |
+
grd_list = []
|
347 |
+
obj_list = []
|
348 |
+
mask_list = []
|
349 |
+
for step in range(len(grd_ind_0)):
|
350 |
+
batch_ind = grd_ind_0[step]
|
351 |
+
grd_token_ind = grd_ind_1[step]
|
352 |
+
if self.pre_grounding:
|
353 |
+
output_obj_tokens = data_dict['obj_tokens'][batch_ind]
|
354 |
+
else:
|
355 |
+
output_obj_tokens = last_hidden_state[batch_ind, input_length[0] + input_length[1] - obj_token_length : input_length[0] + input_length[1], :]
|
356 |
+
output_grd_tokens = last_hidden_state[batch_ind, sum(input_length) + grd_token_ind:sum(input_length) + grd_token_ind + 1, :]
|
357 |
+
grd_list.append(output_grd_tokens)
|
358 |
+
obj_list.append(output_obj_tokens)
|
359 |
+
mask_list.append(data_dict['obj_masks'][batch_ind])
|
360 |
+
output_obj = torch.stack(obj_list).float()
|
361 |
+
output_grd = torch.stack(grd_list).float()
|
362 |
+
data_dict['ground_logits'] = self.ground_head(output_obj, output_grd, torch.stack(mask_list))
|
363 |
+
# data_dict['ground_label'] = torch.concat(data_dict['tgt_object_id'], dim=0)
|
364 |
+
|
365 |
+
# record for cls loss
|
366 |
+
#obj_cls_post_embeds = last_hidden_state[:, input_length[0] + input_length[1] - obj_token_length : input_length[0] + input_length[1], :].float()
|
367 |
+
obj_cls_post_embeds = data_dict['obj_tokens'].float()
|
368 |
+
data_dict['obj_cls_post_logits'] = self.obj_cls_head(obj_cls_post_embeds)
|
369 |
+
return data_dict
|
370 |
+
|
371 |
+
@torch.no_grad()
|
372 |
+
def generate(
|
373 |
+
self,
|
374 |
+
data_dict,
|
375 |
+
use_nucleus_sampling=False,
|
376 |
+
num_beams=5,
|
377 |
+
max_length=256,
|
378 |
+
min_length=1,
|
379 |
+
top_p=0.9,
|
380 |
+
repetition_penalty=6.0,
|
381 |
+
length_penalty=1,
|
382 |
+
num_captions=1,
|
383 |
+
temperature=1,
|
384 |
+
):
|
385 |
+
"""
|
386 |
+
data_dict requires the same keys as forward() except output_gt
|
387 |
+
"""
|
388 |
+
device = self.device
|
389 |
+
bs = len(data_dict['prompt_after_obj'])
|
390 |
+
data_dict['bs'] = bs
|
391 |
+
if 'obj_tokens' not in data_dict:
|
392 |
+
# obtain obj tokens
|
393 |
+
data_dict = self.pcd_encoder(data_dict)
|
394 |
+
# TO CHANGE FOR DEBUG
|
395 |
+
#self.llm_model.float()
|
396 |
+
#data_dict['obj_tokens'] = torch.zeros((data_dict['obj_locs'].shape[0], data_dict['obj_locs'].shape[1], 256)).to(device=device)
|
397 |
+
|
398 |
+
data_dict['obj_tokens'] = self.pcd_proj(data_dict['obj_tokens'].to(device))
|
399 |
+
# data_dict['obj_tokens'] = data_dict['obj_tokens'] + self.pcd_type_embed
|
400 |
+
|
401 |
+
data_dict['img_tokens'] = self.img_proj(self.img_encoder(data_dict['img_fts']))
|
402 |
+
# data_dict['img_tokens'] = data_dict['img_tokens'] + self.img_type_embed
|
403 |
+
|
404 |
+
inputs_embeds, attention_mask, input_length = self.build_right_justified_sequence(data_dict=data_dict)
|
405 |
+
obj_token_length = data_dict['obj_masks'].shape[1]
|
406 |
+
|
407 |
+
# give bos token as condition
|
408 |
+
bos_tokens = self.llm_tokenizer(
|
409 |
+
[self.llm_tokenizer.bos_token] * bs,
|
410 |
+
return_tensors='pt',
|
411 |
+
).to(device)
|
412 |
+
bos_tokens_ids = bos_tokens.input_ids[:, 0:1] # (B, 1)
|
413 |
+
bos_tokens_attn = bos_tokens.attention_mask[:, 0:1] # (B, 1)
|
414 |
+
|
415 |
+
# prepare a `bos_token`
|
416 |
+
bos_embeds = self.llm_model.get_input_embeddings()(bos_tokens_ids) # (B, 1, D)
|
417 |
+
inputs_embeds = torch.cat([inputs_embeds, bos_embeds], dim=1) # (B, T1+O+T2+1, D)
|
418 |
+
attention_mask = torch.cat([attention_mask, bos_tokens_attn], dim=1) # (B, T1+O+T2+1)
|
419 |
+
|
420 |
+
with maybe_autocast(self):
|
421 |
+
outputs = self.llm_model.generate(
|
422 |
+
inputs_embeds=inputs_embeds,
|
423 |
+
attention_mask=attention_mask,
|
424 |
+
do_sample=use_nucleus_sampling,
|
425 |
+
top_p=top_p,
|
426 |
+
temperature=temperature,
|
427 |
+
num_beams=num_beams,
|
428 |
+
max_length=max_length,
|
429 |
+
min_length=min_length,
|
430 |
+
repetition_penalty=repetition_penalty,
|
431 |
+
length_penalty=length_penalty,
|
432 |
+
num_return_sequences=num_captions,
|
433 |
+
return_dict_in_generate=True,
|
434 |
+
output_hidden_states=True,
|
435 |
+
output_scores=True
|
436 |
+
)
|
437 |
+
# note output_ids_idx - 1 = step idx, because we do not preduct [BOS]
|
438 |
+
beam_indices = outputs.beam_indices # bs x step, beam indices range (bsxbeam)
|
439 |
+
scores = outputs.scores # step x (bs x beam) x vocab
|
440 |
+
hidden_states = outputs.hidden_states # step x layer x (bs x beam) x token_num x hidden_dim
|
441 |
+
outputs = outputs.sequences # bs x output_ids
|
442 |
+
outputs[outputs == self.llm_tokenizer.unk_token_id] = self.llm_tokenizer.eos_token_id
|
443 |
+
# data_dict['output_tokens'] = outputs # unable to gather variable-length tensors
|
444 |
+
|
445 |
+
# record for grounding
|
446 |
+
grd_token_id = self.llm_tokenizer.convert_tokens_to_ids(['<s>'])[0]
|
447 |
+
out_input_ids_remove_first_sos = outputs.clone()
|
448 |
+
out_input_ids_remove_first_sos[:, 0] = -100
|
449 |
+
grd_ind_0, grd_ind_1 = (out_input_ids_remove_first_sos == grd_token_id).nonzero(as_tuple=True)
|
450 |
+
|
451 |
+
grd_list = []
|
452 |
+
grd_batch_ind_list = []
|
453 |
+
obj_list = []
|
454 |
+
mask_list = []
|
455 |
+
if len(grd_ind_0) > 0:
|
456 |
+
for step in range(len(grd_ind_0)):
|
457 |
+
batch_ind = grd_ind_0[step]
|
458 |
+
grd_token_ind = grd_ind_1[step]
|
459 |
+
#output_obj_tokens = last_hidden_state[batch_ind, input_length[0] + input_length[1] - obj_token_length : input_length[0] + input_length[1], :]
|
460 |
+
output_obj_tokens = data_dict['obj_tokens'][batch_ind]
|
461 |
+
output_grd_tokens = hidden_states[grd_token_ind-1][-1][beam_indices[batch_ind, grd_token_ind-1]][-1].unsqueeze(0) # grd_token_ind - 1 because first token is sos
|
462 |
+
grd_list.append(output_grd_tokens)
|
463 |
+
grd_batch_ind_list.append(batch_ind)
|
464 |
+
obj_list.append(output_obj_tokens)
|
465 |
+
mask_list.append(data_dict['obj_masks'][batch_ind])
|
466 |
+
output_obj = torch.stack(obj_list).float()
|
467 |
+
output_grd = torch.stack(grd_list).float()
|
468 |
+
data_dict['ground_logits'] = self.ground_head(output_obj, output_grd, torch.stack(mask_list))
|
469 |
+
else:
|
470 |
+
data_dict['ground_logits'] = None
|
471 |
+
# data_dict['ground_label'] = torch.concat(data_dict['tgt_object_id'], dim=0)
|
472 |
+
data_dict['grd_batch_ind_list'] = grd_batch_ind_list
|
473 |
+
|
474 |
+
output_txt = self.llm_tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
475 |
+
output_txt = [txt.strip() for txt in output_txt]
|
476 |
+
data_dict['output_txt'] = output_txt
|
477 |
+
return data_dict
|
leo/pcd_encoder.py
ADDED
@@ -0,0 +1,406 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import einops
|
3 |
+
import numpy as np
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from torch import Tensor, nn
|
6 |
+
from typing import Optional
|
7 |
+
from leo.utils import get_activation_fn, layer_repeat, calc_pairwise_locs
|
8 |
+
|
9 |
+
|
10 |
+
def disabled_train(self, mode=True):
|
11 |
+
"""
|
12 |
+
Overwrite model.train with this function to make sure train/eval mode does not change anymore
|
13 |
+
"""
|
14 |
+
return self
|
15 |
+
|
16 |
+
|
17 |
+
class TransformerEncoderLayer(nn.Module):
|
18 |
+
def __init__(self, d_model, nhead, dim_feedforward=2048, batch_first=True, dropout=0.1, activation="relu", prenorm=False):
|
19 |
+
super().__init__()
|
20 |
+
self.self_attn = nn.MultiheadAttention(
|
21 |
+
d_model, nhead, dropout=dropout, batch_first=batch_first
|
22 |
+
)
|
23 |
+
# Implementation of Feedforward modules
|
24 |
+
self.linear1 = nn.Linear(d_model, dim_feedforward)
|
25 |
+
self.dropout = nn.Dropout(dropout)
|
26 |
+
self.linear2 = nn.Linear(dim_feedforward, d_model)
|
27 |
+
|
28 |
+
self.norm1 = nn.LayerNorm(d_model)
|
29 |
+
self.norm2 = nn.LayerNorm(d_model)
|
30 |
+
self.dropout1 = nn.Dropout(dropout)
|
31 |
+
self.dropout2 = nn.Dropout(dropout)
|
32 |
+
|
33 |
+
self.activation = get_activation_fn(activation)
|
34 |
+
self.prenorm = prenorm
|
35 |
+
|
36 |
+
def forward(
|
37 |
+
self, tgt, tgt_mask: Optional[Tensor] = None,
|
38 |
+
tgt_key_padding_mask: Optional[Tensor] = None,
|
39 |
+
):
|
40 |
+
tgt2 = tgt
|
41 |
+
if self.prenorm:
|
42 |
+
tgt2 = self.norm1(tgt2)
|
43 |
+
tgt2, self_attn_matrices = self.self_attn(
|
44 |
+
query=tgt2, key=tgt2, value=tgt2, attn_mask=tgt_mask,
|
45 |
+
key_padding_mask=tgt_key_padding_mask
|
46 |
+
)
|
47 |
+
tgt = tgt + self.dropout1(tgt2)
|
48 |
+
if not self.prenorm:
|
49 |
+
tgt = self.norm1(tgt)
|
50 |
+
if self.prenorm:
|
51 |
+
tgt = self.norm2(tgt)
|
52 |
+
tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
|
53 |
+
tgt = tgt + self.dropout2(tgt2)
|
54 |
+
if not self.prenorm:
|
55 |
+
tgt = self.norm2(tgt)
|
56 |
+
return tgt, self_attn_matrices
|
57 |
+
|
58 |
+
|
59 |
+
class MultiHeadAttentionSpatial(nn.Module):
|
60 |
+
def __init__(
|
61 |
+
self, d_model, n_head, dropout=0.1, spatial_multihead=True, spatial_dim=5,
|
62 |
+
spatial_attn_fusion='mul',
|
63 |
+
):
|
64 |
+
super().__init__()
|
65 |
+
assert d_model % n_head == 0, 'd_model: %d, n_head: %d' % (d_model, n_head)
|
66 |
+
|
67 |
+
self.n_head = n_head
|
68 |
+
self.d_model = d_model
|
69 |
+
self.d_per_head = d_model // n_head
|
70 |
+
self.spatial_multihead = spatial_multihead
|
71 |
+
self.spatial_dim = spatial_dim
|
72 |
+
self.spatial_attn_fusion = spatial_attn_fusion
|
73 |
+
|
74 |
+
self.w_qs = nn.Linear(d_model, d_model)
|
75 |
+
self.w_ks = nn.Linear(d_model, d_model)
|
76 |
+
self.w_vs = nn.Linear(d_model, d_model)
|
77 |
+
|
78 |
+
self.fc = nn.Linear(d_model, d_model)
|
79 |
+
self.dropout = nn.Dropout(p=dropout)
|
80 |
+
self.layer_norm = nn.LayerNorm(d_model)
|
81 |
+
|
82 |
+
self.spatial_n_head = n_head if spatial_multihead else 1
|
83 |
+
if self.spatial_attn_fusion in ['mul', 'bias', 'add']:
|
84 |
+
self.pairwise_loc_fc = nn.Linear(spatial_dim, self.spatial_n_head)
|
85 |
+
elif self.spatial_attn_fusion == 'ctx':
|
86 |
+
self.pairwise_loc_fc = nn.Linear(spatial_dim, d_model)
|
87 |
+
elif self.spatial_attn_fusion == 'cond':
|
88 |
+
self.lang_cond_fc = nn.Linear(d_model, self.spatial_n_head * (spatial_dim + 1))
|
89 |
+
else:
|
90 |
+
raise NotImplementedError('unsupported spatial_attn_fusion %s' % (self.spatial_attn_fusion))
|
91 |
+
|
92 |
+
def forward(self, q, k, v, pairwise_locs, key_padding_mask=None, txt_embeds=None):
|
93 |
+
residual = q
|
94 |
+
q = einops.rearrange(self.w_qs(q), 'b l (head k) -> head b l k', head=self.n_head)
|
95 |
+
k = einops.rearrange(self.w_ks(k), 'b t (head k) -> head b t k', head=self.n_head)
|
96 |
+
v = einops.rearrange(self.w_vs(v), 'b t (head v) -> head b t v', head=self.n_head)
|
97 |
+
attn = torch.einsum('hblk,hbtk->hblt', q, k) / np.sqrt(q.shape[-1])
|
98 |
+
|
99 |
+
if self.spatial_attn_fusion in ['mul', 'bias', 'add']:
|
100 |
+
loc_attn = self.pairwise_loc_fc(pairwise_locs)
|
101 |
+
loc_attn = einops.rearrange(loc_attn, 'b l t h -> h b l t')
|
102 |
+
if self.spatial_attn_fusion == 'mul':
|
103 |
+
loc_attn = F.relu(loc_attn)
|
104 |
+
if not self.spatial_multihead:
|
105 |
+
loc_attn = einops.repeat(loc_attn, 'h b l t -> (h nh) b l t', nh=self.n_head)
|
106 |
+
elif self.spatial_attn_fusion == 'ctx':
|
107 |
+
loc_attn = self.pairwise_loc_fc(pairwise_locs)
|
108 |
+
loc_attn = einops.rearrange(loc_attn, 'b l t (h k) -> h b l t k', h=self.n_head)
|
109 |
+
loc_attn = torch.einsum('hblk,hbltk->hblt', q, loc_attn) / np.sqrt(q.shape[-1])
|
110 |
+
elif self.spatial_attn_fusion == 'cond':
|
111 |
+
spatial_weights = self.lang_cond_fc(residual)
|
112 |
+
spatial_weights = einops.rearrange(spatial_weights, 'b l (h d) -> h b l d', h=self.spatial_n_head,
|
113 |
+
d=self.spatial_dim + 1)
|
114 |
+
if self.spatial_n_head == 1:
|
115 |
+
spatial_weights = einops.repeat(spatial_weights, '1 b l d -> h b l d', h=self.n_head)
|
116 |
+
spatial_bias = spatial_weights[..., :1]
|
117 |
+
spatial_weights = spatial_weights[..., 1:]
|
118 |
+
loc_attn = torch.einsum('hbld,bltd->hblt', spatial_weights, pairwise_locs) + spatial_bias
|
119 |
+
loc_attn = torch.sigmoid(loc_attn)
|
120 |
+
|
121 |
+
if key_padding_mask is not None:
|
122 |
+
mask = einops.repeat(key_padding_mask, 'b t -> h b l t', h=self.n_head, l=q.size(2))
|
123 |
+
attn = attn.masked_fill(mask, -np.inf)
|
124 |
+
if self.spatial_attn_fusion in ['mul', 'cond']:
|
125 |
+
loc_attn = loc_attn.masked_fill(mask, 0)
|
126 |
+
else:
|
127 |
+
loc_attn = loc_attn.masked_fill(mask, -np.inf)
|
128 |
+
|
129 |
+
if self.spatial_attn_fusion == 'add':
|
130 |
+
fused_attn = (torch.softmax(attn, 3) + torch.softmax(loc_attn, 3)) / 2
|
131 |
+
else:
|
132 |
+
if self.spatial_attn_fusion in ['mul', 'cond']:
|
133 |
+
fused_attn = torch.log(torch.clamp(loc_attn, min=1e-6)) + attn
|
134 |
+
else:
|
135 |
+
fused_attn = loc_attn + attn
|
136 |
+
fused_attn = torch.softmax(fused_attn, 3)
|
137 |
+
|
138 |
+
assert torch.sum(torch.isnan(fused_attn) == 0), print(fused_attn)
|
139 |
+
|
140 |
+
output = torch.einsum('hblt,hbtv->hblv', fused_attn, v)
|
141 |
+
output = einops.rearrange(output, 'head b l v -> b l (head v)')
|
142 |
+
output = self.dropout(self.fc(output))
|
143 |
+
output = self.layer_norm(output + residual)
|
144 |
+
return output, fused_attn
|
145 |
+
|
146 |
+
|
147 |
+
class TransformerSpatialEncoderLayer(TransformerEncoderLayer):
|
148 |
+
def __init__(
|
149 |
+
self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu",
|
150 |
+
spatial_multihead=True, spatial_dim=5, spatial_attn_fusion='mul'
|
151 |
+
):
|
152 |
+
super().__init__(
|
153 |
+
d_model, nhead, dim_feedforward=dim_feedforward, dropout=dropout, activation=activation
|
154 |
+
)
|
155 |
+
del self.self_attn
|
156 |
+
self.self_attn = MultiHeadAttentionSpatial(
|
157 |
+
d_model, nhead, dropout=dropout,
|
158 |
+
spatial_multihead=spatial_multihead,
|
159 |
+
spatial_dim=spatial_dim,
|
160 |
+
spatial_attn_fusion=spatial_attn_fusion,
|
161 |
+
)
|
162 |
+
|
163 |
+
def forward(
|
164 |
+
self, tgt, tgt_pairwise_locs,
|
165 |
+
tgt_mask: Optional[Tensor] = None,
|
166 |
+
tgt_key_padding_mask: Optional[Tensor] = None,
|
167 |
+
):
|
168 |
+
tgt2 = tgt
|
169 |
+
tgt2, self_attn_matrices = self.self_attn(
|
170 |
+
tgt2, tgt2, tgt2, tgt_pairwise_locs,
|
171 |
+
key_padding_mask=tgt_key_padding_mask
|
172 |
+
)
|
173 |
+
tgt = tgt + self.dropout1(tgt2)
|
174 |
+
tgt = self.norm1(tgt)
|
175 |
+
tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
|
176 |
+
tgt = tgt + self.dropout2(tgt2)
|
177 |
+
tgt = self.norm2(tgt)
|
178 |
+
return tgt, self_attn_matrices
|
179 |
+
|
180 |
+
|
181 |
+
def _init_weights_bert(module, std=0.02):
|
182 |
+
"""
|
183 |
+
Huggingface transformer weight initialization,
|
184 |
+
most commonly for bert initialization
|
185 |
+
"""
|
186 |
+
if isinstance(module, nn.Linear):
|
187 |
+
# Slightly different from the TF version which uses truncated_normal for initialization
|
188 |
+
# cf https://github.com/pytorch/pytorch/pull/5617
|
189 |
+
module.weight.data.normal_(mean=0.0, std=std)
|
190 |
+
if module.bias is not None:
|
191 |
+
module.bias.data.zero_()
|
192 |
+
elif isinstance(module, nn.Embedding):
|
193 |
+
module.weight.data.normal_(mean=0.0, std=std)
|
194 |
+
if module.padding_idx is not None:
|
195 |
+
module.weight.data[module.padding_idx].zero_()
|
196 |
+
elif isinstance(module, nn.LayerNorm):
|
197 |
+
module.bias.data.zero_()
|
198 |
+
module.weight.data.fill_(1.0)
|
199 |
+
|
200 |
+
|
201 |
+
def generate_fourier_features(pos, num_bands=10, max_freq=15, concat_pos=True, sine_only=False):
|
202 |
+
# Input: B, N, C
|
203 |
+
# Output: B, N, C'
|
204 |
+
batch_size = pos.shape[0]
|
205 |
+
device = pos.device
|
206 |
+
|
207 |
+
min_freq = 1.0
|
208 |
+
# Nyquist frequency at the target resolution:
|
209 |
+
freq_bands = torch.linspace(start=min_freq, end=max_freq, steps=num_bands, device=device)
|
210 |
+
|
211 |
+
# Get frequency bands for each spatial dimension.
|
212 |
+
# Output is size [n, d * num_bands]
|
213 |
+
per_pos_features = pos.unsqueeze(-1).repeat(1, 1, 1, num_bands) * freq_bands
|
214 |
+
per_pos_features = torch.reshape(
|
215 |
+
per_pos_features, [batch_size, -1, np.prod(per_pos_features.shape[2:])])
|
216 |
+
if sine_only:
|
217 |
+
# Output is size [n, d * num_bands]
|
218 |
+
per_pos_features = torch.sin(np.pi * (per_pos_features))
|
219 |
+
else:
|
220 |
+
# Output is size [n, 2 * d * num_bands]
|
221 |
+
per_pos_features = torch.cat(
|
222 |
+
[torch.sin(np.pi * per_pos_features), torch.cos(np.pi * per_pos_features)], dim=-1
|
223 |
+
)
|
224 |
+
# Concatenate the raw input positions.
|
225 |
+
if concat_pos:
|
226 |
+
# Adds d bands to the encoding.
|
227 |
+
per_pos_features = torch.cat(
|
228 |
+
[pos, per_pos_features.expand(batch_size, -1, -1)], dim=-1)
|
229 |
+
return per_pos_features
|
230 |
+
|
231 |
+
|
232 |
+
class OSE3D(nn.Module):
|
233 |
+
# Open-vocabulary, Spatial-attention, Embodied-token, 3D-agent
|
234 |
+
def __init__(self, use_spatial_attn=True, use_embodied_token=False, hidden_dim=256, fourier_size=84, spatial_encoder={
|
235 |
+
"num_attention_heads": 8,
|
236 |
+
"dim_feedforward": 2048,
|
237 |
+
"dropout": 0.1,
|
238 |
+
"activation": "gelu",
|
239 |
+
"spatial_dim": 5,
|
240 |
+
"spatial_multihead": True,
|
241 |
+
"spatial_attn_fusion": "cond",
|
242 |
+
"num_layers": 3,
|
243 |
+
"pairwise_rel_type": "center",
|
244 |
+
"spatial_dist_norm": True,
|
245 |
+
"obj_loc_encoding": "same_all",
|
246 |
+
"dim_loc": 6,
|
247 |
+
}):
|
248 |
+
super().__init__()
|
249 |
+
self.use_spatial_attn = use_spatial_attn # spatial attention
|
250 |
+
self.use_embodied_token = use_embodied_token # embodied token
|
251 |
+
|
252 |
+
# pcd backbone
|
253 |
+
# self.obj_encoder = PointcloudBackbone(backbone)
|
254 |
+
self.obj_proj = nn.Linear(768, hidden_dim)
|
255 |
+
|
256 |
+
# embodied token
|
257 |
+
if self.use_embodied_token:
|
258 |
+
self.anchor_feat = nn.Parameter(torch.zeros(1, 1, hidden_dim))
|
259 |
+
self.anchor_size = nn.Parameter(torch.ones(1, 1, 3))
|
260 |
+
self.orient_encoder = nn.Linear(fourier_size, hidden_dim)
|
261 |
+
self.obj_type_embed = nn.Embedding(2, hidden_dim)
|
262 |
+
|
263 |
+
# spatial encoder
|
264 |
+
if self.use_spatial_attn:
|
265 |
+
spatial_encoder_layer = TransformerSpatialEncoderLayer(
|
266 |
+
d_model=hidden_dim,
|
267 |
+
nhead=spatial_encoder['num_attention_heads'],
|
268 |
+
dim_feedforward=spatial_encoder['dim_feedforward'],
|
269 |
+
dropout=spatial_encoder['dropout'],
|
270 |
+
activation=spatial_encoder['activation'],
|
271 |
+
spatial_dim=spatial_encoder['spatial_dim'],
|
272 |
+
spatial_multihead=spatial_encoder['spatial_multihead'],
|
273 |
+
spatial_attn_fusion=spatial_encoder['spatial_attn_fusion'],
|
274 |
+
)
|
275 |
+
else:
|
276 |
+
spatial_encoder_layer = TransformerEncoderLayer(
|
277 |
+
d_model=hidden_dim,
|
278 |
+
nhead=spatial_encoder['num_attention_heads'],
|
279 |
+
dim_feedforward=spatial_encoder['dim_feedforward'],
|
280 |
+
dropout=spatial_encoder['dropout'],
|
281 |
+
activation=spatial_encoder['activation'],
|
282 |
+
)
|
283 |
+
|
284 |
+
self.spatial_encoder = layer_repeat(
|
285 |
+
spatial_encoder_layer,
|
286 |
+
spatial_encoder['num_layers'],
|
287 |
+
)
|
288 |
+
self.pairwise_rel_type = spatial_encoder['pairwise_rel_type']
|
289 |
+
self.spatial_dist_norm = spatial_encoder['spatial_dist_norm']
|
290 |
+
self.spatial_dim = spatial_encoder['spatial_dim']
|
291 |
+
self.obj_loc_encoding = spatial_encoder['obj_loc_encoding']
|
292 |
+
|
293 |
+
# location encoding
|
294 |
+
if self.obj_loc_encoding in ['same_0', 'same_all']:
|
295 |
+
num_loc_layers = 1
|
296 |
+
elif self.obj_loc_encoding == 'diff_all':
|
297 |
+
num_loc_layers = spatial_encoder['num_layers']
|
298 |
+
|
299 |
+
loc_layer = nn.Sequential(
|
300 |
+
nn.Linear(spatial_encoder['dim_loc'], hidden_dim),
|
301 |
+
nn.LayerNorm(hidden_dim),
|
302 |
+
)
|
303 |
+
self.loc_layers = layer_repeat(loc_layer, num_loc_layers)
|
304 |
+
|
305 |
+
|
306 |
+
# only initialize spatial encoder and loc layers
|
307 |
+
self.spatial_encoder.apply(_init_weights_bert)
|
308 |
+
self.loc_layers.apply(_init_weights_bert)
|
309 |
+
|
310 |
+
if self.use_embodied_token:
|
311 |
+
nn.init.normal_(self.anchor_feat, std=0.02)
|
312 |
+
|
313 |
+
@property
|
314 |
+
def device(self):
|
315 |
+
return list(self.parameters())[0].device
|
316 |
+
|
317 |
+
def forward(self, data_dict):
|
318 |
+
"""
|
319 |
+
data_dict requires keys:
|
320 |
+
obj_fts: (B, N, P, 6), xyz + rgb
|
321 |
+
obj_masks: (B, N), 1 valid and 0 masked
|
322 |
+
obj_locs: (B, N, 6), xyz + whd
|
323 |
+
anchor_locs: (B, 3)
|
324 |
+
anchor_orientation: (B, C)
|
325 |
+
"""
|
326 |
+
|
327 |
+
# obj_feats = self.obj_encoder(data_dict['obj_fts'])
|
328 |
+
obj_feats = data_dict['obj_feats']
|
329 |
+
obj_feats = self.obj_proj(obj_feats)
|
330 |
+
obj_masks = ~data_dict['obj_masks'] # flipped due to different convention of TransformerEncoder
|
331 |
+
|
332 |
+
B, N = obj_feats.shape[:2]
|
333 |
+
device = obj_feats.device
|
334 |
+
|
335 |
+
obj_type_ids = torch.zeros((B, N), dtype=torch.long, device=device)
|
336 |
+
obj_type_embeds = self.obj_type_embed(obj_type_ids)
|
337 |
+
|
338 |
+
if self.use_embodied_token:
|
339 |
+
# anchor feature
|
340 |
+
anchor_orient = data_dict['anchor_orientation'].unsqueeze(1)
|
341 |
+
anchor_orient_feat = self.orient_encoder(generate_fourier_features(anchor_orient))
|
342 |
+
anchor_feat = self.anchor_feat + anchor_orient_feat
|
343 |
+
anchor_mask = torch.zeros((B, 1), dtype=bool, device=device)
|
344 |
+
|
345 |
+
# anchor loc (3) + size (3)
|
346 |
+
anchor_loc = torch.cat(
|
347 |
+
[data_dict['anchor_locs'].unsqueeze(1), self.anchor_size.expand(B, -1, -1).to(device)], dim=-1
|
348 |
+
)
|
349 |
+
|
350 |
+
# anchor type
|
351 |
+
anchor_type_id = torch.ones((B, 1), dtype=torch.long, device=device)
|
352 |
+
anchor_type_embed = self.obj_type_embed(anchor_type_id)
|
353 |
+
|
354 |
+
# fuse anchor and objs
|
355 |
+
all_obj_feats = torch.cat([anchor_feat, obj_feats], dim=1)
|
356 |
+
all_obj_masks = torch.cat((anchor_mask, obj_masks), dim=1)
|
357 |
+
|
358 |
+
all_obj_locs = torch.cat([anchor_loc, data_dict['obj_locs']], dim=1)
|
359 |
+
all_obj_type_embeds = torch.cat((anchor_type_embed, obj_type_embeds), dim=1)
|
360 |
+
|
361 |
+
else:
|
362 |
+
all_obj_feats = obj_feats
|
363 |
+
all_obj_masks = obj_masks
|
364 |
+
|
365 |
+
all_obj_locs = data_dict['obj_locs']
|
366 |
+
all_obj_type_embeds = obj_type_embeds
|
367 |
+
|
368 |
+
all_obj_feats = all_obj_feats + all_obj_type_embeds
|
369 |
+
|
370 |
+
# call spatial encoder
|
371 |
+
if self.use_spatial_attn:
|
372 |
+
pairwise_locs = calc_pairwise_locs(
|
373 |
+
all_obj_locs[:, :, :3],
|
374 |
+
all_obj_locs[:, :, 3:],
|
375 |
+
pairwise_rel_type=self.pairwise_rel_type,
|
376 |
+
spatial_dist_norm=self.spatial_dist_norm,
|
377 |
+
spatial_dim=self.spatial_dim,
|
378 |
+
)
|
379 |
+
|
380 |
+
for i, pc_layer in enumerate(self.spatial_encoder):
|
381 |
+
if self.obj_loc_encoding == 'diff_all':
|
382 |
+
query_pos = self.loc_layers[i](all_obj_locs)
|
383 |
+
else:
|
384 |
+
query_pos = self.loc_layers[0](all_obj_locs)
|
385 |
+
if not (self.obj_loc_encoding == 'same_0' and i > 0):
|
386 |
+
all_obj_feats = all_obj_feats + query_pos
|
387 |
+
|
388 |
+
if self.use_spatial_attn:
|
389 |
+
all_obj_feats, _ = pc_layer(
|
390 |
+
all_obj_feats, pairwise_locs,
|
391 |
+
tgt_key_padding_mask=all_obj_masks
|
392 |
+
)
|
393 |
+
else:
|
394 |
+
all_obj_feats, _ = pc_layer(
|
395 |
+
all_obj_feats,
|
396 |
+
tgt_key_padding_mask=all_obj_masks
|
397 |
+
)
|
398 |
+
|
399 |
+
data_dict['obj_tokens'] = all_obj_feats
|
400 |
+
data_dict['obj_masks'] = ~all_obj_masks
|
401 |
+
|
402 |
+
# ###feat_pth = os.path.join(ASSET_DIR, f'inputs/{scan_id}', f'{scan_id}_img_gt.pth')
|
403 |
+
# data_dict['obj_tokens'] = torch.load('assets/inputs/scene0350_00/obj_tokens.pth')
|
404 |
+
# data_dict['obj_masks'] = torch.load('assets/inputs/scene0350_00/obj_masks.pth')
|
405 |
+
|
406 |
+
return data_dict
|
leo/utils.py
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import copy
|
3 |
+
import torch
|
4 |
+
import einops
|
5 |
+
import numpy as np
|
6 |
+
from torch import nn
|
7 |
+
import torch.nn.functional as F
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
def get_activation_fn(activation_type):
|
12 |
+
if activation_type not in ["relu", "gelu", "glu"]:
|
13 |
+
raise RuntimeError(f"activation function currently support relu/gelu, not {activation_type}")
|
14 |
+
return getattr(F, activation_type)
|
15 |
+
|
16 |
+
def get_mlp_head(input_size, hidden_size, output_size, dropout=0):
|
17 |
+
return nn.Sequential(*[
|
18 |
+
nn.Linear(input_size, hidden_size),
|
19 |
+
nn.ReLU(),
|
20 |
+
nn.LayerNorm(hidden_size, eps=1e-12),
|
21 |
+
nn.Dropout(dropout),
|
22 |
+
nn.Linear(hidden_size, output_size)
|
23 |
+
])
|
24 |
+
|
25 |
+
def layer_repeat(module, N, share_layer=False):
|
26 |
+
if share_layer:
|
27 |
+
return nn.ModuleList([module] * N)
|
28 |
+
else:
|
29 |
+
return nn.ModuleList([copy.deepcopy(module) for _ in range(N - 1)] + [module])
|
30 |
+
|
31 |
+
|
32 |
+
def calc_pairwise_locs(obj_centers, obj_whls, eps=1e-10, pairwise_rel_type='center', spatial_dist_norm=True,
|
33 |
+
spatial_dim=5):
|
34 |
+
if pairwise_rel_type == 'mlp':
|
35 |
+
obj_locs = torch.cat([obj_centers, obj_whls], 2)
|
36 |
+
pairwise_locs = torch.cat(
|
37 |
+
[einops.repeat(obj_locs, 'b l d -> b l x d', x=obj_locs.size(1)),
|
38 |
+
einops.repeat(obj_locs, 'b l d -> b x l d', x=obj_locs.size(1))],
|
39 |
+
dim=3
|
40 |
+
)
|
41 |
+
return pairwise_locs
|
42 |
+
|
43 |
+
pairwise_locs = einops.repeat(obj_centers, 'b l d -> b l 1 d') \
|
44 |
+
- einops.repeat(obj_centers, 'b l d -> b 1 l d')
|
45 |
+
pairwise_dists = torch.sqrt(torch.sum(pairwise_locs ** 2, 3) + eps) # (b, l, l)
|
46 |
+
if spatial_dist_norm:
|
47 |
+
max_dists = torch.max(pairwise_dists.view(pairwise_dists.size(0), -1), dim=1)[0]
|
48 |
+
norm_pairwise_dists = pairwise_dists / einops.repeat(max_dists, 'b -> b 1 1')
|
49 |
+
else:
|
50 |
+
norm_pairwise_dists = pairwise_dists
|
51 |
+
|
52 |
+
if spatial_dim == 1:
|
53 |
+
return norm_pairwise_dists.unsqueeze(3)
|
54 |
+
|
55 |
+
pairwise_dists_2d = torch.sqrt(torch.sum(pairwise_locs[..., :2] ** 2, 3) + eps)
|
56 |
+
if pairwise_rel_type == 'center':
|
57 |
+
pairwise_locs = torch.stack(
|
58 |
+
[norm_pairwise_dists, pairwise_locs[..., 2] / pairwise_dists,
|
59 |
+
pairwise_dists_2d / pairwise_dists, pairwise_locs[..., 1] / pairwise_dists_2d,
|
60 |
+
pairwise_locs[..., 0] / pairwise_dists_2d],
|
61 |
+
dim=3
|
62 |
+
)
|
63 |
+
elif pairwise_rel_type == 'vertical_bottom':
|
64 |
+
bottom_centers = torch.clone(obj_centers)
|
65 |
+
bottom_centers[:, :, 2] -= obj_whls[:, :, 2]
|
66 |
+
bottom_pairwise_locs = einops.repeat(bottom_centers, 'b l d -> b l 1 d') \
|
67 |
+
- einops.repeat(bottom_centers, 'b l d -> b 1 l d')
|
68 |
+
bottom_pairwise_dists = torch.sqrt(torch.sum(bottom_pairwise_locs ** 2, 3) + eps) # (b, l, l)
|
69 |
+
bottom_pairwise_dists_2d = torch.sqrt(torch.sum(bottom_pairwise_locs[..., :2] ** 2, 3) + eps)
|
70 |
+
pairwise_locs = torch.stack(
|
71 |
+
[norm_pairwise_dists,
|
72 |
+
bottom_pairwise_locs[..., 2] / bottom_pairwise_dists,
|
73 |
+
bottom_pairwise_dists_2d / bottom_pairwise_dists,
|
74 |
+
pairwise_locs[..., 1] / pairwise_dists_2d,
|
75 |
+
pairwise_locs[..., 0] / pairwise_dists_2d],
|
76 |
+
dim=3
|
77 |
+
)
|
78 |
+
|
79 |
+
if spatial_dim == 4:
|
80 |
+
pairwise_locs = pairwise_locs[..., 1:]
|
81 |
+
return pairwise_locs
|
82 |
+
|
83 |
+
def convert_pc_to_box(obj_pc):
|
84 |
+
xmin = np.min(obj_pc[:,0])
|
85 |
+
ymin = np.min(obj_pc[:,1])
|
86 |
+
zmin = np.min(obj_pc[:,2])
|
87 |
+
xmax = np.max(obj_pc[:,0])
|
88 |
+
ymax = np.max(obj_pc[:,1])
|
89 |
+
zmax = np.max(obj_pc[:,2])
|
90 |
+
center = [(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2]
|
91 |
+
box_size = [xmax-xmin, ymax-ymin, zmax-zmin]
|
92 |
+
return center, box_size
|
93 |
+
|
94 |
+
class LabelConverter(object):
|
95 |
+
def __init__(self, file_path):
|
96 |
+
self.raw_name_to_id = {}
|
97 |
+
self.nyu40id_to_id = {}
|
98 |
+
self.nyu40_name_to_id = {}
|
99 |
+
self.scannet_name_to_scannet_id = {'cabinet':0, 'bed':1, 'chair':2, 'sofa':3, 'table':4,
|
100 |
+
'door':5, 'window':6,'bookshelf':7,'picture':8, 'counter':9, 'desk':10, 'curtain':11,
|
101 |
+
'refrigerator':12, 'shower curtain':13, 'toilet':14, 'sink':15, 'bathtub':16, 'others':17}
|
102 |
+
self.id_to_scannetid = {}
|
103 |
+
self.scannet_raw_id_to_raw_name = {}
|
104 |
+
self.raw_name_to_scannet_raw_id = {}
|
105 |
+
|
106 |
+
with open(file_path, encoding='utf-8') as fd:
|
107 |
+
rd = list(csv.reader(fd, delimiter="\t", quotechar='"'))
|
108 |
+
for i in range(1, len(rd)):
|
109 |
+
raw_id = i - 1
|
110 |
+
scannet_raw_id = int(rd[i][0])
|
111 |
+
raw_name = rd[i][1]
|
112 |
+
nyu40_id = int(rd[i][4])
|
113 |
+
nyu40_name = rd[i][7]
|
114 |
+
self.raw_name_to_id[raw_name] = raw_id
|
115 |
+
self.scannet_raw_id_to_raw_name[scannet_raw_id] = raw_name
|
116 |
+
self.raw_name_to_scannet_raw_id[raw_name] = scannet_raw_id
|
117 |
+
self.nyu40id_to_id[nyu40_id] = raw_id
|
118 |
+
self.nyu40_name_to_id[nyu40_name] = raw_id
|
119 |
+
if nyu40_name not in self.scannet_name_to_scannet_id:
|
120 |
+
self.id_to_scannetid[raw_id] = self.scannet_name_to_scannet_id['others']
|
121 |
+
else:
|
122 |
+
self.id_to_scannetid[raw_id] = self.scannet_name_to_scannet_id[nyu40_name]
|
123 |
+
|
124 |
+
def build_rotate_mat(split, rot_aug=True, rand_angle='axis'):
|
125 |
+
if rand_angle == 'random':
|
126 |
+
theta = np.random.rand() * np.pi * 2
|
127 |
+
else:
|
128 |
+
ROTATE_ANGLES = [0, np.pi/2, np.pi, np.pi*3/2]
|
129 |
+
theta_idx = np.random.randint(len(ROTATE_ANGLES))
|
130 |
+
theta = ROTATE_ANGLES[theta_idx]
|
131 |
+
if (theta is not None) and (theta != 0) and (split == 'train') and rot_aug:
|
132 |
+
rot_matrix = np.array([
|
133 |
+
[np.cos(theta), -np.sin(theta), 0],
|
134 |
+
[np.sin(theta), np.cos(theta), 0],
|
135 |
+
[0, 0, 1]
|
136 |
+
], dtype=np.float32)
|
137 |
+
else:
|
138 |
+
rot_matrix = None
|
139 |
+
return rot_matrix
|
140 |
+
|
141 |
+
def obj_processing_post(obj_pcds, rot_aug=True):
|
142 |
+
obj_pcds = torch.from_numpy(obj_pcds)
|
143 |
+
rot_matrix = build_rotate_mat('val', rot_aug)
|
144 |
+
if rot_matrix is not None:
|
145 |
+
rot_matrix = torch.from_numpy(rot_matrix.transpose())
|
146 |
+
obj_pcds[:, :, :3] @= rot_matrix
|
147 |
+
|
148 |
+
xyz = obj_pcds[:, :, :3]
|
149 |
+
center = xyz.mean(1)
|
150 |
+
xyz_min = xyz.min(1).values
|
151 |
+
xyz_max = xyz.max(1).values
|
152 |
+
box_center = (xyz_min + xyz_max) / 2
|
153 |
+
size = xyz_max - xyz_min
|
154 |
+
obj_locs = torch.cat([center, size], dim=1)
|
155 |
+
obj_boxes = torch.cat([box_center, size], dim=1)
|
156 |
+
|
157 |
+
# centering
|
158 |
+
obj_pcds[:, :, :3].sub_(obj_pcds[:, :, :3].mean(1, keepdim=True))
|
159 |
+
|
160 |
+
# normalization
|
161 |
+
max_dist = (obj_pcds[:, :, :3]**2).sum(2).sqrt().max(1).values
|
162 |
+
max_dist.clamp_(min=1e-6)
|
163 |
+
obj_pcds[:, :, :3].div_(max_dist[:, None, None])
|
164 |
+
|
165 |
+
return obj_pcds, obj_locs, obj_boxes, rot_matrix
|
166 |
+
|
167 |
+
|
168 |
+
def pad_sequence(sequence_list, max_len=None, pad=0, return_mask=False):
|
169 |
+
lens = [x.shape[0] for x in sequence_list]
|
170 |
+
if max_len is None:
|
171 |
+
max_len = max(lens)
|
172 |
+
|
173 |
+
shape = list(sequence_list[0].shape)
|
174 |
+
shape[0] = max_len
|
175 |
+
shape = [len(sequence_list)] + shape
|
176 |
+
dtype = sequence_list[0].dtype
|
177 |
+
device = sequence_list[0].device
|
178 |
+
padded_sequence = torch.ones(shape, dtype=dtype, device=device) * pad
|
179 |
+
for i, tensor in enumerate(sequence_list):
|
180 |
+
padded_sequence[i, :tensor.shape[0]] = tensor
|
181 |
+
padded_sequence = padded_sequence.to(dtype)
|
182 |
+
|
183 |
+
if return_mask:
|
184 |
+
mask = torch.arange(max_len).to(device)[None, :] >= torch.LongTensor(lens).to(device)[:, None] # True as masked.
|
185 |
+
return padded_sequence, mask
|
186 |
+
else:
|
187 |
+
return padded_sequence
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
clip==0.2.0
|
2 |
+
einops==0.8.0
|
3 |
+
gradio==4.39.0
|
4 |
+
numpy==1.24.3
|
5 |
+
peft==0.12.0
|
6 |
+
timm==1.0.8
|
7 |
+
torch==2.3.1
|
8 |
+
transformers==4.40.2
|