zfzhang-thu commited on
Commit
9de012e
1 Parent(s): b5cf65e

non-LFS commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pth filter=lfs diff=lfs merge=lfs -text
37
+ *.bin filter=lfs diff=lfs merge=lfs -text
38
+ *.glb filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import re
4
+
5
+ from leo.inference import inference
6
+
7
+ MESH_DIR = 'assets/mesh'
8
+ MESH_NAMES = sorted([os.path.splitext(fname)[0] for fname in os.listdir(MESH_DIR)])
9
+ STEP_COUNTS = 6
10
+
11
+ def change_scene(dropdown_scene: str):
12
+ # reset 3D scene and chatbot history
13
+ return os.path.join(MESH_DIR, f'{dropdown_scene}.glb')
14
+
15
+ with gr.Blocks(title='LEO Demo') as demo:
16
+ gr.HTML(value="<h1 align='center'>Task-oriented Sequential Grounding in 3D Scenes </h1>")
17
+
18
+ with gr.Row():
19
+ with gr.Column(scale=5):
20
+ dropdown_scene = gr.Dropdown(
21
+ choices=MESH_NAMES,
22
+ value='scene0050_00',
23
+ interactive=True,
24
+ label='Select a 3D scene',
25
+ )
26
+ model_3d = gr.Model3D(
27
+ value=os.path.join(MESH_DIR, f'scene0050_00.glb'),
28
+ clear_color=[0.0, 0.0, 0.0, 0.0],
29
+ label='3D Scene',
30
+ camera_position=(80, 100, 6),
31
+ height=659,
32
+ )
33
+ gr.HTML(
34
+ """<center><strong>
35
+ 👆 SCROLL and DRAG on the 3D Scene
36
+ to zoom in/out and rotate. Press CTRL and DRAG to pan.
37
+ </strong></center>
38
+ """
39
+ )
40
+
41
+ dropdown_scene.change(
42
+ fn=change_scene,
43
+ inputs=[dropdown_scene],
44
+ outputs=[model_3d],
45
+ queue=False
46
+ )
47
+
48
+ # LEO task-to-plan inference wrapper
49
+ def leo_task_to_plan(task_description):
50
+ task_input = {
51
+ "task_description": task_description,
52
+ "scan_id": "scene0050_00"
53
+ }
54
+ plan = inference("scene0050_00", task_input, predict_mode=True)
55
+ plan = plan[0]['pred_plan_text']
56
+ # parts = re.split(r'(\d+\.)', plan)[1:]
57
+ # steps = [parts[i] + parts[i + 1].rstrip() for i in range(0, len(parts), 2)]
58
+ return plan
59
+
60
+ # LEO ground inference wrapper
61
+ def leo_plan_to_masks(task_description, *action_steps):
62
+ formatted_action_steps = [
63
+ {"action": step, "target_id": "unknown", "label": "unknown"} for step in action_steps if step != ""
64
+ ]
65
+ task_input = {
66
+ "task_description": task_description,
67
+ "action_steps": formatted_action_steps,
68
+ "scan_id": "scene0050_00"
69
+ }
70
+ masks = inference("scene0050_00", task_input, predict_mode=False)
71
+ masks = [tensor.item() for tensor in masks]
72
+ return [f"assets/mask/scene0050_00/scene0050_00_obj_{mask}.glb" for mask in masks] + ["assets/mask/scene0050_00/scene0050_00_obj_empty.glb"] * (STEP_COUNTS - len(masks))
73
+
74
+ # LEO task-to-plan and ground inference wrapper
75
+ def leo_task_to_plan_and_masks(task_description):
76
+ task_input = {
77
+ "task_description": task_description,
78
+ "scan_id": "scene0050_00"
79
+ }
80
+ plan = inference("scene0050_00", task_input, predict_mode=True)
81
+ plan_text = plan[0]['pred_plan_text']
82
+ parts = re.split(r'(\d+\.)', plan_text)[1:]
83
+ steps = [parts[i] + parts[i + 1].rstrip() for i in range(0, len(parts), 2)]
84
+ steps += ["### PLANNING HAS ENDED, SEE ABOVE FOR DETAILS ###"] * (STEP_COUNTS - len(steps))
85
+
86
+ masks = plan[0]['predict_object_id']
87
+ mask_paths = [f"assets/mask/scene0050_00/scene0050_00_obj_{mask}.glb" for mask in masks]
88
+ mask_paths += ["assets/mask/scene0050_00/scene0050_00_obj_empty.glb"] * (STEP_COUNTS - len(masks)) # fill with empty mask
89
+
90
+ output = []
91
+ for i in range(STEP_COUNTS):
92
+ output.append(steps[i])
93
+ output.append(mask_paths[i])
94
+ return output
95
+
96
+ with gr.Tab("LEO Task-to-Plan"):
97
+ gr.Interface(
98
+ fn=leo_task_to_plan,
99
+ inputs=[gr.Textbox(label="Task Description")],
100
+ outputs=["text"],
101
+ examples=[
102
+ ["Freshen up in the bathroom."]
103
+ ],
104
+ title="LEO Task-to-Plan: Input task, Output plan text"
105
+ )
106
+
107
+ with gr.Tab("LEO Plan-to-Masks"):
108
+ gr.Interface(
109
+ fn=leo_plan_to_masks,
110
+ inputs=[gr.Textbox(label="Task Description")] + [gr.Textbox(label=f"Action Step {i+1}") for i in range(STEP_COUNTS)],
111
+ outputs=[gr.Model3D(
112
+ clear_color=[0.0, 0.0, 0.0, 0.0], camera_position=(80, 100, 6), label=f"3D Model for Step {i+1} (if the step exists)") for i in range(STEP_COUNTS)],
113
+ examples=[
114
+ ["Retrieve an item from the backpack.", "1. Walk to the ottoman located near the brown leather armchair.", "2. Choose the black backpack resting on this ottoman.", "3. Open the backpack to find the needed item."] + [""] * (STEP_COUNTS - 3)
115
+ ],
116
+ title="LEO Plan-to-Masks: Input plan, Output 3D Masks for each step, Red denotes predicted target object"
117
+ )
118
+
119
+ with gr.Tab("LEO Task-to-Plan and Masks"):
120
+ gr.Interface(
121
+ fn=leo_task_to_plan_and_masks,
122
+ inputs=[gr.Textbox(label="Task Description")],
123
+ outputs=[
124
+ item for sublist in zip(
125
+ [gr.Textbox(label=f"Action Step {i+1}") for i in range(STEP_COUNTS)],
126
+ [gr.Model3D(
127
+ clear_color=[0.0, 0.0, 0.0, 0.0],
128
+ camera_position=(80, 100, 6),
129
+ label=f"3D Model for Step {i+1} (if the step exists)"
130
+ ) for i in range(STEP_COUNTS)]
131
+ ) for item in sublist
132
+ ],
133
+ examples=[
134
+ ["Retrieve an item from the backpack."]
135
+ ],
136
+ title="LEO Task-to-Plan and Masks: Input task, Output plan text and 3D Masks for each step, Red denotes predicted target object",
137
+ # js="""
138
+ # function() {
139
+ # const stepCounts = """ + str(STEP_COUNTS) + """;
140
+ # const stepElems = document.querySelectorAll('.output_interface .textbox_output');
141
+ # const modelElems = document.querySelectorAll('.output_interface .model3d_output');
142
+ # for (let i = 0; i < stepCounts; i++) {
143
+ # if (stepElems[i].value === '### PLANNING HAS ENDED, SEE ABOVE FOR DETAILS ###' || modelElems[i].src.includes('scene0050_00_obj_empty.glb')) {
144
+ # stepElems[i].style.display = 'none';
145
+ # modelElems[i].style.display = 'none';
146
+ # }
147
+ # }
148
+ # }
149
+ # """
150
+ )
151
+
152
+ demo.queue().launch(share=True, allowed_paths=['assets'])
assets/meta/scannetv2-labels.combined.tsv ADDED
@@ -0,0 +1,608 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ id raw_category category count nyu40id eigen13id nyuClass nyu40class eigen13class ModelNet40 ModelNet10 ShapeNetCore55 synsetoffset wnsynsetid wnsynsetkey mpcat40 mpcat40index
2
+ 1 wall wall 8277 1 12 wall wall Wall n04546855 wall.n.01 wall 1
3
+ 2 chair chair 4646 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
4
+ 22 books book 1678 23 2 book books Books n02870526 book.n.11 objects 39
5
+ 3 floor floor 1553 2 5 floor floor Floor n03365592 floor.n.01 floor 2
6
+ 5 door door 1483 8 12 door door Wall door n03221720 door.n.01 door 4
7
+ 1163 object object 1313 40 7 otherprop Objects objects 39
8
+ 16 window window 1209 9 13 window window Window n04587648 window.n.01 window 9
9
+ 4 table table 1170 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
10
+ 56 trash can trash can 1090 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
11
+ 13 pillow pillow 937 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8
12
+ 15 picture picture 862 11 8 picture picture Picture n03931044 picture.n.01 picture 6
13
+ 41 ceiling ceiling 806 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17
14
+ 26 box box 775 29 7 box box Objects n02883344 box.n.01 objects 39
15
+ 161 doorframe doorframe 768 8 12 door door Wall door doorframe.n.01 door 4
16
+ 19 monitor monitor 765 40 7 monitor otherprop Objects monitor monitor tv or monitor 3211117 n03782190 monitor.n.04 objects 39
17
+ 7 cabinet cabinet 731 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
18
+ 9 desk desk 680 14 10 desk desk Table desk desk table 4379243 n03179701 desk.n.01 table 5
19
+ 8 shelf shelf 641 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
20
+ 10 office chair office chair 595 5 4 chair chair Chair chair chair chair 3001627 n04373704 swivel_chair.n.01 chair 3
21
+ 31 towel towel 570 27 7 towel towel Objects n04459362 towel.n.01 towel 20
22
+ 6 couch couch 502 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10
23
+ 14 sink sink 488 34 7 sink sink Objects sink n04223580 sink.n.01 sink 15
24
+ 48 backpack backpack 479 40 7 backpack otherprop Objects n02769748 backpack.n.01 objects 39
25
+ 28 lamp lamp 419 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
26
+ 11 bed bed 370 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
27
+ 18 bookshelf bookshelf 360 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
28
+ 71 mirror mirror 349 19 7 mirror mirror Objects n03773035 mirror.n.01 mirror 21
29
+ 21 curtain curtain 347 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12
30
+ 40 plant plant 331 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14
31
+ 52 whiteboard whiteboard 327 30 7 whiteboard whiteboard Objects n03211616 display_panel.n.01 board_panel 35
32
+ 96 radiator radiator 322 39 6 radiator otherfurniture Furniture n04041069 radiator.n.02 misc 40
33
+ 22 book book 318 23 2 book books Books n02870526 book.n.11 objects 39
34
+ 29 kitchen cabinet kitchen cabinet 310 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7
35
+ 49 toilet paper toilet paper 291 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39
36
+ 29 kitchen cabinets kitchen cabinet 289 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
37
+ 23 armchair armchair 281 5 4 chair chair Chair chair chair chair 3001627 n02738535 armchair.n.01 chair 3
38
+ 63 shoes shoe 272 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
39
+ 24 coffee table coffee table 258 7 10 coffee table table Table table table table 4379243 n03063968 coffee_table.n.01 table 5
40
+ 17 toilet toilet 256 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 toilet 18
41
+ 47 bag bag 252 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
42
+ 32 clothes clothes 248 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
43
+ 46 keyboard keyboard 246 40 7 keyboard otherprop Objects keyboard computer keyboard 3085013 n03085013 computer_keyboard.n.01 objects 39
44
+ 65 bottle bottle 226 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
45
+ 97 recycling bin recycling bin 225 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
46
+ 34 nightstand nightstand 224 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 chest_of_drawers 13
47
+ 38 stool stool 221 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19
48
+ 33 tv tv 219 25 11 television television TV tv or monitor 3211117 n03211117 display.n.06 tv_monitor 22
49
+ 75 file cabinet file cabinet 217 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
50
+ 36 dresser dresser 213 17 6 dresser dresser Furniture dresser dresser n03015254 chest_of_drawers.n.01 chest_of_drawers 13
51
+ 64 computer tower computer tower 203 40 7 computer otherprop Objects n03082979 computer.n.01 objects 39
52
+ 32 clothing clothes 165 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
53
+ 101 telephone telephone 164 40 7 telephone otherprop Objects telephone 4401088 n04401088 telephone.n.01 objects 39
54
+ 130 cup cup 157 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
55
+ 27 refrigerator refrigerator 154 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 appliances 37
56
+ 44 end table end table 147 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
57
+ 131 jacket jacket 146 40 7 jacket otherprop Objects n03589791 jacket.n.01 clothes 38
58
+ 55 shower curtain shower curtain 144 28 7 shower curtain shower curtain Objects curtain n04209239 shower_curtain.n.01 curtain 12
59
+ 42 bathtub bathtub 144 36 7 bathtub bathtub Objects bathtub bathtub tub 2808440 n02808440 bathtub.n.01 bathtub 25
60
+ 59 microwave microwave 141 40 7 microwave otherprop Objects microwave 3761084 n03761084 microwave.n.02 appliances 37
61
+ 159 kitchen counter kitchen counter 140 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26
62
+ 74 sofa chair sofa chair 129 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
63
+ 82 paper towel dispenser paper towel dispenser 129 40 7 paper towel dispenser otherprop Objects objects 39
64
+ 1164 bathroom vanity bathroom vanity 126 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 table 5
65
+ 93 suitcase suitcase 118 40 7 luggage otherprop Objects n02773838 bag.n.06 objects 39
66
+ 77 laptop laptop 111 40 7 laptop otherprop Objects laptop laptop 3642806 n03642806 laptop.n.01 objects 39
67
+ 67 ottoman ottoman 111 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19
68
+ 128 shower walls shower wall 109 1 12 wall wall Wall n04546855 wall.n.01 wall 1
69
+ 50 printer printer 106 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37
70
+ 35 counter counter 104 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26
71
+ 69 board board 100 38 7 board otherstructure Objects board_panel 35
72
+ 100 soap dispenser soap dispenser 99 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39
73
+ 62 stove stove 95 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37
74
+ 105 light light 93 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28
75
+ 1165 closet wall closet wall 90 1 12 wall wall Wall n04546855 wall.n.01 wall 1
76
+ 165 mini fridge mini fridge 87 24 6 refridgerator refridgerator Furniture n03273913 electric_refrigerator.n.01 appliances 37
77
+ 7 cabinets cabinet 79 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
78
+ 5 doors door 76 8 12 door door Wall door n03221720 door.n.01 door 4
79
+ 76 fan fan 75 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40
80
+ 230 tissue box tissue box 73 40 7 tissue box otherprop Objects n02883344 box.n.01 objects 39
81
+ 54 blanket blanket 72 40 7 blanket otherprop Objects n02849154 blanket.n.01 objects 39
82
+ 125 bathroom stall bathroom stall 71 38 7 otherstructure Objects n02873839 booth.n.02 misc 40
83
+ 72 copier copier 70 40 7 otherprop Objects n03257586 duplicator.n.01 appliances 37
84
+ 68 bench bench 66 39 6 bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34
85
+ 145 bar bar 66 38 7 bar otherstructure Objects n02788689 bar.n.03 misc 40
86
+ 157 soap dish soap dish 65 40 7 soap dish otherprop Objects n04254009 soap_dish.n.01 objects 39
87
+ 1166 laundry hamper laundry hamper 65 40 7 laundry basket otherprop Objects objects 39
88
+ 132 storage bin storage bin 63 40 7 storage bin otherprop Objects objects 39
89
+ 1167 bathroom stall door bathroom stall door 62 8 12 door door Wall door n03221720 door.n.01 door 4
90
+ 232 light switch light switch 61 38 7 light switch otherstructure Objects n04372370 switch.n.01 misc 40
91
+ 134 coffee maker coffee maker 61 40 7 otherprop Objects n03063338 coffee_maker.n.01 appliances 37
92
+ 51 tv stand tv stand 61 39 6 tv stand otherfurniture Furniture tv_stand n03290653 entertainment_center.n.01 furniture 36
93
+ 250 decoration decoration 60 40 7 otherprop Objects n03169390 decoration.n.01 misc 40
94
+ 1168 ceiling light ceiling light 59 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28
95
+ 342 range hood range hood 59 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 misc 40
96
+ 89 blackboard blackboard 58 38 7 blackboard otherstructure Objects n02846511 blackboard.n.01 board_panel 35
97
+ 103 clock clock 58 40 7 clock otherprop Objects clock 3046257 n03046257 clock.n.01 objects 39
98
+ 99 wardrobe closet wardrobe 54 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
99
+ 95 rail rail 53 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30
100
+ 154 bulletin board bulletin board 53 38 7 board otherstructure Objects n03211616 display_panel.n.01 board_panel 35
101
+ 140 mat mat 52 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2
102
+ 1169 trash bin trash bin 52 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
103
+ 193 ledge ledge 51 38 7 otherstructure Objects n09337253 ledge.n.01 misc 40
104
+ 116 seat seat 49 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36
105
+ 202 mouse mouse 49 40 7 mouse otherprop Objects n03793489 mouse.n.04 objects 39
106
+ 73 basket basket 48 40 7 basket otherprop Objects basket 2801938 n02801938 basket.n.01 objects 39
107
+ 78 shower shower 48 38 7 otherstructure Objects n04208936 shower.n.01 shower 23
108
+ 1170 dumbbell dumbbell 48 40 7 otherprop Objects n03255030 dumbbell.n.01 objects 39
109
+ 79 paper paper 46 26 7 paper paper Objects n14974264 paper.n.01 objects 39
110
+ 80 person person 46 31 7 person person Objects person n05217688 person.n.02 misc 40
111
+ 141 windowsill windowsill 45 38 7 otherstructure Objects n04590263 windowsill.n.01 window 9
112
+ 57 closet closet 45 39 6 wardrobe otherfurniture Furniture wardrobe misc 40
113
+ 102 bucket bucket 45 40 7 bucket otherprop Objects n02909870 bucket.n.01 misc 40
114
+ 261 sign sign 44 40 7 sign otherprop Objects n04217882 signboard.n.01 objects 39
115
+ 118 speaker speaker 43 40 7 speaker otherprop Objects speaker 3691459 n03691459 loudspeaker.n.01 objects 39
116
+ 136 dishwasher dishwasher 43 38 7 dishwasher otherstructure Objects dishwasher 3207941 n03207941 dishwasher.n.01 appliances 37
117
+ 98 container container 43 40 7 container otherprop Objects n03094503 container.n.01 objects 39
118
+ 1171 stair rail stair rail 42 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30
119
+ 170 shower curtain rod shower curtain rod 42 40 7 otherprop Objects curtain 12
120
+ 1172 tube tube 41 40 7 otherprop Objects misc 40
121
+ 1173 bathroom cabinet bathroom cabinet 39 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
122
+ 79 papers paper 39 26 7 paper paper Objects n14974264 paper.n.01 objects 39
123
+ 221 storage container storage container 39 40 7 container otherprop Objects objects 39
124
+ 570 paper bag paper bag 39 37 7 bag bag Objects n04122825 sack.n.01 objects 39
125
+ 138 paper towel roll paper towel roll 39 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20
126
+ 168 ball ball 39 40 7 ball otherprop Objects objects 39
127
+ 276 closet doors closet door 38 8 12 door door Wall door n03221720 door.n.01 door 4
128
+ 106 laundry basket laundry basket 37 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39
129
+ 214 cart cart 37 40 7 cart otherprop Objects n03484083 handcart.n.01 shelving 31
130
+ 276 closet door closet door 35 8 12 door door Wall door n03221720 door.n.01 door 4
131
+ 323 dish rack dish rack 35 40 7 dish rack otherprop Objects n03207630 dish_rack.n.01 objects 39
132
+ 58 stairs stairs 35 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16
133
+ 86 blinds blinds 35 13 13 blinds blinds Window n02851099 blind.n.03 blinds 32
134
+ 2 stack of chairs chair 35 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
135
+ 399 purse purse 34 40 7 purse otherprop Objects n02774152 bag.n.04 objects 39
136
+ 121 bicycle bicycle 33 40 7 bicycle otherprop Objects bicycle 2834778 n02834778 bicycle.n.01 objects 39
137
+ 185 tray tray 32 40 7 tray otherprop Objects n04476259 tray.n.01 objects 39
138
+ 300 plunger plunger 30 40 7 otherprop Objects n03970156 plunger.n.03 objects 39
139
+ 180 paper cutter paper cutter 30 40 7 paper cutter otherprop Objects n03886940 paper_cutter.n.01 objects 39
140
+ 163 toilet paper dispenser toilet paper dispenser 29 40 7 otherprop Objects objects 39
141
+ 26 boxes box 29 29 7 box box Objects n02883344 box.n.01 objects 39
142
+ 66 bin bin 28 40 7 bin otherprop Objects n02839910 bin.n.01 objects 39
143
+ 208 toilet seat cover dispenser toilet seat cover dispenser 28 40 7 otherprop Objects objects 39
144
+ 112 guitar guitar 28 40 7 guitar otherprop Objects guitar guitar 3467517 n03467517 guitar.n.01 objects 39
145
+ 540 mailboxes mailbox 28 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40
146
+ 395 handicap bar handicap bar 27 38 7 bar otherstructure Objects misc 40
147
+ 166 fire extinguisher fire extinguisher 27 40 7 fire extinguisher otherprop Objects n03345837 fire_extinguisher.n.01 misc 40
148
+ 122 ladder ladder 27 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 stairs 16
149
+ 120 column column 26 38 7 column otherstructure Objects n03074380 column.n.06 column 24
150
+ 107 pipe pipe 25 40 7 pipe otherprop Objects n03944672 pipe.n.02 misc 40
151
+ 283 vacuum cleaner vacuum cleaner 25 40 7 otherprop Objects n04517823 vacuum.n.04 objects 39
152
+ 88 plate plate 24 40 7 plate otherprop Objects n03959485 plate.n.04 objects 39
153
+ 90 piano piano 24 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36
154
+ 177 water cooler water cooler 24 39 6 water cooler otherfurniture Furniture n04559166 water_cooler.n.01 misc 40
155
+ 1174 cd case cd case 24 40 7 otherprop Objects objects 39
156
+ 562 bowl bowl 24 40 7 bowl otherprop Objects bowl bowl 2880940 n02880940 bowl.n.03 objects 39
157
+ 1175 closet rod closet rod 24 40 7 otherprop Objects n04100174 rod.n.01 misc 40
158
+ 1156 bathroom counter bathroom counter 24 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26
159
+ 84 oven oven 23 38 7 oven otherstructure Objects n03862676 oven.n.01 appliances 37
160
+ 104 stand stand 23 39 6 stand otherfurniture Furniture table table table 4379243 n04301000 stand.n.04 table 5
161
+ 229 scale scale 23 40 7 scale otherprop Objects n04141975 scale.n.07 objects 39
162
+ 70 washing machine washing machine 23 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37
163
+ 325 broom broom 22 40 7 broom otherprop Objects n02906734 broom.n.01 objects 39
164
+ 169 hat hat 22 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38
165
+ 128 shower wall shower wall 22 1 12 wall wall Wall n04208936 shower.n.01 wall 1
166
+ 331 guitar case guitar case 21 40 7 guitar case otherprop Objects objects 39
167
+ 87 rack rack 21 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
168
+ 488 water pitcher water pitcher 21 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39
169
+ 776 laundry detergent laundry detergent 21 40 7 otherprop Objects objects 39
170
+ 370 hair dryer hair dryer 21 40 7 hair dryer otherprop Objects n03483316 hand_blower.n.01 objects 39
171
+ 191 pillar pillar 21 38 7 column otherstructure Objects n03073977 column.n.07 column 24
172
+ 748 divider divider 20 40 7 otherprop Objects wall 1
173
+ 242 power outlet power outlet 19 40 7 otherprop Objects misc 40
174
+ 45 dining table dining table 19 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
175
+ 417 shower floor shower floor 19 2 5 floor floor Floor n04208936 shower.n.01 floor 2
176
+ 70 washing machines washing machine 19 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37
177
+ 188 shower door shower door 19 8 12 door door Wall door n04208936 shower.n.01 door 4
178
+ 1176 coffee kettle coffee kettle 18 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39
179
+ 1177 wardrobe cabinet wardrobe 18 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
180
+ 1178 structure structure 18 38 7 otherstructure Objects misc 40
181
+ 18 bookshelves bookshelf 17 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
182
+ 110 clothes dryer clothes dryer 17 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37
183
+ 148 toaster toaster 17 40 7 toaster otherprop Objects n04442312 toaster.n.02 appliances 37
184
+ 63 shoe shoe 17 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
185
+ 155 ironing board ironing board 16 39 6 ironing board otherfurniture Furniture n03586090 ironing_board.n.01 objects 39
186
+ 572 alarm clock alarm clock 16 40 7 alarm clock otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39
187
+ 1179 shower head shower head 15 38 7 otherstructure Objects shower 23
188
+ 28 lamp base lamp 15 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
189
+ 392 water bottle water bottle 15 40 7 bottle otherprop Objects bottle bottle 2876657 n04557648 water_bottle.n.01 objects 39
190
+ 1180 keyboard piano keyboard piano 15 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36
191
+ 609 projector screen projector screen 15 38 7 projector screen otherstructure Objects misc 40
192
+ 1181 case of water bottles case of water bottles 15 40 7 otherprop Objects objects 39
193
+ 195 toaster oven toaster oven 14 40 7 toaster oven otherprop Objects n04442441 toaster_oven.n.01 appliances 37
194
+ 581 music stand music stand 14 39 6 music stand otherfurniture Furniture n03801760 music_stand.n.01 furniture 36
195
+ 58 staircase stairs 14 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16
196
+ 1182 coat rack coat rack 14 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 3
197
+ 1183 storage organizer storage organizer 14 40 7 otherprop Objects shelving 3
198
+ 139 machine machine 14 40 7 machine otherprop Objects n03699975 machine.n.01 appliances 37
199
+ 1184 folded chair folded chair 14 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
200
+ 1185 fire alarm fire alarm 14 40 7 otherprop Objects n03343737 fire_alarm.n.02 misc 40
201
+ 156 fireplace fireplace 13 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 fireplace 27
202
+ 408 vent vent 13 40 7 otherprop Objects n04526241 vent.n.01 misc 40
203
+ 213 furniture furniture 13 39 6 furniture otherfurniture Furniture n03405725 furniture.n.01 furniture 36
204
+ 1186 power strip power strip 13 40 7 otherprop Objects objects 39
205
+ 1187 calendar calendar 13 40 7 otherprop Objects objects 39
206
+ 1188 poster poster 13 11 8 picture picture Picture n03931044 picture.n.01 picture 6
207
+ 115 toilet paper holder toilet paper holder 13 40 7 toilet paper holder otherprop Objects objects 39
208
+ 1189 potted plant potted plant 12 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14
209
+ 304 stuffed animal stuffed animal 12 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39
210
+ 1190 luggage luggage 12 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39
211
+ 21 curtains curtain 12 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12
212
+ 312 headphones headphones 12 40 7 otherprop Objects n03261776 earphone.n.01 objects 39
213
+ 233 crate crate 12 39 6 crate otherfurniture Furniture n03127925 crate.n.01 objects 39
214
+ 286 candle candle 12 40 7 candle otherprop Objects lamp n02948072 candle.n.01 objects 39
215
+ 264 projector projector 12 40 7 projector otherprop Objects n04009552 projector.n.02 objects 39
216
+ 110 clothes dryers clothes dryer 12 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37
217
+ 1191 mattress mattress 12 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
218
+ 356 dustpan dustpan 12 40 7 otherprop Objects n03259009 dustpan.n.02 objects 39
219
+ 25 drawer drawer 11 39 6 drawer otherfurniture Furniture n03233905 drawer.n.01 furniture 36
220
+ 750 rod rod 11 40 7 otherprop Objects pistol 3948459 n03427202 gat.n.01 misc 40
221
+ 269 globe globe 11 40 7 globe otherprop Objects objects 39
222
+ 307 footrest footrest 11 39 6 foot rest otherfurniture Furniture stool n03380724 footstool.n.01 stool 19
223
+ 410 piano bench piano bench 11 39 6 piano bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34
224
+ 730 breakfast bar breakfast bar 11 38 7 bar otherstructure Objects counter 26
225
+ 216 step stool step stool 11 40 7 step stool otherprop Objects stool n04315713 step_stool.n.01 stool 19
226
+ 1192 hand rail hand rail 11 38 7 railing otherstructure Objects railing 30
227
+ 119 vending machine vending machine 11 40 7 machine otherprop Objects n04525305 vending_machine.n.01 appliances 37
228
+ 682 ceiling fan ceiling fan 11 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40
229
+ 434 swiffer swiffer 11 40 7 otherprop Objects objects 39
230
+ 126 foosball table foosball table 11 39 6 foosball table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5
231
+ 919 jar jar 11 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39
232
+ 85 footstool footstool 11 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19
233
+ 1193 folded table folded table 10 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
234
+ 108 round table round table 10 7 10 table table Table table table table 4379243 n04114554 round_table.n.02 table 5
235
+ 135 hamper hamper 10 40 7 basket otherprop Objects basket 2801938 n03482405 hamper.n.02 objects 39
236
+ 1194 poster tube poster tube 10 40 7 otherprop Objects objects 39
237
+ 432 case case 10 40 7 case otherprop Objects objects 39
238
+ 53 carpet carpet 10 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2
239
+ 1195 thermostat thermostat 10 40 7 otherprop Objects n04422875 thermostat.n.01 misc 40
240
+ 111 coat coat 10 40 7 jacket otherprop Objects n03057021 coat.n.01 clothes 38
241
+ 305 water fountain water fountain 10 38 7 water fountain otherstructure Objects n03241335 drinking_fountain.n.01 misc 40
242
+ 1125 smoke detector smoke detector 10 40 7 otherprop Objects misc 40
243
+ 13 pillows pillow 9 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8
244
+ 1196 flip flops flip flops 9 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
245
+ 1197 cloth cloth 9 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
246
+ 1198 banner banner 9 40 7 otherprop Objects n02788021 banner.n.01 misc 40
247
+ 1199 clothes hanger clothes hanger 9 40 7 otherprop Objects n03057920 coat_hanger.n.01 objects 39
248
+ 1200 whiteboard eraser whiteboard eraser 9 40 7 otherprop Objects objects 39
249
+ 378 iron iron 9 40 7 otherprop Objects n03584829 iron.n.04 objects 39
250
+ 591 instrument case instrument case 9 40 7 case otherprop Objects objects 39
251
+ 49 toilet paper rolls toilet paper 9 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39
252
+ 92 soap soap 9 40 7 soap otherprop Objects n04253437 soap.n.01 objects 39
253
+ 1098 block block 9 40 7 otherprop Objects misc 40
254
+ 291 wall hanging wall hanging 8 40 7 otherprop Objects n03491178 hanging.n.01 picture 6
255
+ 1063 kitchen island kitchen island 8 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 counter 26
256
+ 107 pipes pipe 8 38 7 otherstructure Objects misc 40
257
+ 1135 toothbrush toothbrush 8 40 7 toothbrush otherprop Objects n04453156 toothbrush.n.01 objects 39
258
+ 189 shirt shirt 8 40 7 otherprop Objects n04197391 shirt.n.01 clothes 38
259
+ 245 cutting board cutting board 8 40 7 cutting board otherprop Objects n03025513 chopping_board.n.01 objects 39
260
+ 194 vase vase 8 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39
261
+ 1201 shower control valve shower control valve 8 38 7 otherstructure Objects n04208936 shower.n.01 shower 23
262
+ 386 exercise machine exercise machine 8 40 7 machine otherprop Objects gym_equipment 33
263
+ 1202 compost bin compost bin 8 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
264
+ 857 shorts shorts 8 40 7 shorts otherprop Objects clothes 38
265
+ 452 tire tire 8 40 7 otherprop Objects n04440749 tire.n.01 objects 39
266
+ 1203 teddy bear teddy bear 7 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39
267
+ 346 bathrobe bathrobe 7 40 7 otherprop Objects n02807616 bathrobe.n.01 clothes 38
268
+ 152 handrail handrail 7 38 7 railing otherstructure Objects n02788148 bannister.n.02 railing 30
269
+ 83 faucet faucet 7 40 7 faucet otherprop Objects faucet 3325088 n03325088 faucet.n.01 misc 40
270
+ 1204 pantry wall pantry wall 7 1 12 wall wall Wall n04546855 wall.n.01 wall 1
271
+ 726 thermos thermos 7 40 7 flask otherprop Objects bottle bottle 2876657 n04422727 thermos.n.01 objects 39
272
+ 61 rug rug 7 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2
273
+ 39 couch cushions cushion 7 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8
274
+ 1117 tripod tripod 7 39 6 stand otherfurniture Furniture n04485082 tripod.n.01 objects 39
275
+ 540 mailbox mailbox 7 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40
276
+ 1205 tupperware tupperware 7 40 7 otherprop Objects objects 39
277
+ 415 shoe rack shoe rack 7 40 7 shoe rack otherprop Objects shelving 31
278
+ 31 towels towel 6 27 7 towel towel Objects n04459362 towel.n.01 towel 20
279
+ 1206 beer bottles beer bottle 6 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
280
+ 153 treadmill treadmill 6 39 6 treadmill otherfurniture Furniture n04477387 treadmill.n.01 gym_equipment 33
281
+ 1207 salt salt 6 40 7 otherprop Objects objects 39
282
+ 129 chest chest 6 39 6 chest otherfurniture Furniture dresser dresser chest_of_drawers 13
283
+ 220 dispenser dispenser 6 40 7 otherprop Objects n03210683 dispenser.n.01 objects 39
284
+ 1208 mirror doors mirror door 6 8 12 door door Wall door n03221720 door.n.01 door 4
285
+ 231 remote remote 6 40 7 otherprop Objects remote_control 4074963 n04074963 remote_control.n.01 objects 39
286
+ 1209 folded ladder folded ladder 6 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 misc 40
287
+ 39 cushion cushion 6 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8
288
+ 1210 carton carton 6 40 7 otherprop Objects objects 39
289
+ 117 step step 6 38 7 otherstructure Objects n04314914 step.n.04 misc 40
290
+ 822 drying rack drying rack 6 39 6 drying rack otherfurniture Furniture shelving 31
291
+ 238 slippers slipper 6 40 7 shoe otherprop Objects n04241394 slipper.n.01 clothes 38
292
+ 143 pool table pool table 6 39 6 pool table otherfurniture Furniture table table table 4379243 n03982430 pool_table.n.01 table 5
293
+ 1211 soda stream soda stream 6 40 7 otherprop Objects objects 39
294
+ 228 toilet brush toilet brush 6 40 7 toilet brush otherprop Objects objects 39
295
+ 494 loft bed loft bed 6 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
296
+ 226 cooking pot cooking pot 6 40 7 pot otherprop Objects objects 39
297
+ 91 heater heater 6 39 6 heater otherfurniture Furniture n03508101 heater.n.01 misc 40
298
+ 1072 messenger bag messenger bag 6 37 7 bag bag Objects objects 39
299
+ 435 stapler stapler 6 40 7 stapler otherprop Objects n04303497 stapler.n.01 objects 39
300
+ 1165 closet walls closet wall 5 1 12 wall wall Wall n04546855 wall.n.01 wall 1
301
+ 345 scanner scanner 5 40 7 otherprop Objects appliances 37
302
+ 893 elliptical machine elliptical machine 5 40 7 machine otherprop Objects gym_equipment 33
303
+ 621 kettle kettle 5 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39
304
+ 1212 metronome metronome 5 40 7 otherprop Objects n03757604 metronome.n.01 objects 39
305
+ 297 dumbell dumbell 5 40 7 otherprop Objects objects 39
306
+ 1213 music book music book 5 23 2 book books Books n02870526 book.n.11 objects 39
307
+ 1214 rice cooker rice cooker 5 40 7 otherprop Objects objects 39
308
+ 1215 dart board dart board 5 38 7 board otherstructure Objects n03162940 dartboard.n.01 objects 39
309
+ 529 sewing machine sewing machine 5 40 7 sewing machine otherprop Objects n04179913 sewing_machine.n.01 objects 39
310
+ 1216 grab bar grab bar 5 38 7 railing otherstructure Objects railing 30
311
+ 1217 flowerpot flowerpot 5 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39
312
+ 1218 painting painting 5 11 8 picture picture Picture n03931044 picture.n.01 picture 6
313
+ 1219 railing railing 5 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30
314
+ 1220 stair stair 5 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 stairs 16
315
+ 525 toolbox toolbox 5 39 6 chest otherfurniture Furniture n04452615 toolbox.n.01 objects 39
316
+ 204 nerf gun nerf gun 5 40 7 otherprop Objects objects 39
317
+ 693 binders binder 5 40 7 binder otherprop Objects objects 39
318
+ 179 desk lamp desk lamp 5 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
319
+ 1221 quadcopter quadcopter 5 40 7 otherprop Objects objects 39
320
+ 1222 pitcher pitcher 5 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39
321
+ 1223 hanging hanging 5 40 7 otherprop Objects misc 40
322
+ 1224 mail mail 5 40 7 otherprop Objects misc 40
323
+ 1225 closet ceiling closet ceiling 5 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17
324
+ 1226 hoverboard hoverboard 5 40 7 otherprop Objects objects 39
325
+ 1227 beanbag chair beanbag chair 5 39 6 bean bag otherfurniture Furniture n02816656 beanbag.n.01 chair 3
326
+ 571 water heater water heater 5 40 7 water heater otherprop Objects n04560113 water_heater.n.01 misc 40
327
+ 1228 spray bottle spray bottle 5 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
328
+ 556 rope rope 5 40 7 rope otherprop Objects n04108268 rope.n.01 objects 39
329
+ 280 plastic container plastic container 5 40 7 container otherprop Objects objects 39
330
+ 1229 soap bottle soap bottle 5 40 7 soap otherprop Objects objects 39
331
+ 1230 ikea bag ikea bag 4 37 7 bag bag Objects 2773838 n02773838 bag.n.06 objects 39
332
+ 1231 sleeping bag sleeping bag 4 40 7 otherprop Objects n04235860 sleeping_bag.n.01 objects 39
333
+ 1232 duffel bag duffel bag 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
334
+ 746 frying pan frying pan 4 40 7 frying pan otherprop Objects n03400231 frying_pan.n.01 objects 39
335
+ 1233 oven mitt oven mitt 4 40 7 otherprop Objects objects 39
336
+ 1234 pot pot 4 40 7 pot otherprop Objects n04235860 sleeping_bag.n.01 objects 39
337
+ 144 hand dryer hand dryer 4 40 7 otherprop Objects objects 39
338
+ 282 dollhouse dollhouse 4 39 6 doll house otherfurniture Furniture n03219483 dollhouse.n.01 objects 39
339
+ 167 shampoo bottle shampoo bottle 4 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
340
+ 1235 hair brush hair brush 4 40 7 otherprop Objects n02908217 brush.n.02 objects 39
341
+ 1236 tennis racket tennis racket 4 40 7 otherprop Objects n04409806 tennis_racket.n.01 objects 39
342
+ 1237 display case display case 4 40 7 case otherprop Objects objects 39
343
+ 234 ping pong table ping pong table 4 39 6 ping pong table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5
344
+ 563 boiler boiler 4 40 7 otherprop Objects misc 40
345
+ 1238 bag of coffee beans bag of coffee beans 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
346
+ 1239 bananas banana 4 40 7 otherprop Objects n00021265 food.n.01 objects 39
347
+ 1240 carseat carseat 4 40 7 otherprop Objects misc 40
348
+ 366 helmet helmet 4 40 7 otherprop Objects helmet 3513137 n03513137 helmet.n.02 clothes 38
349
+ 816 umbrella umbrella 4 40 7 umbrella otherprop Objects n04507155 umbrella.n.01 objects 39
350
+ 1241 coffee box coffee box 4 40 7 otherprop Objects objects 39
351
+ 719 envelope envelope 4 40 7 envelope otherprop Objects n03291819 envelope.n.01 objects 39
352
+ 284 wet floor sign wet floor sign 4 40 7 sign otherprop Objects misc 40
353
+ 1242 clothing rack clothing rack 4 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
354
+ 247 controller controller 4 40 7 otherprop Objects n03096960 control.n.09 objects 39
355
+ 1243 bath walls bathroom wall 4 1 12 wall wall Wall n04546855 wall.n.01 wall 1
356
+ 1244 podium podium 4 39 6 otherfurniture Furniture n03159640 dais.n.01 furniture 36
357
+ 1245 storage box storage box 4 29 7 box box Objects n02883344 box.n.01 objects 39
358
+ 1246 dolly dolly 4 40 7 otherprop Objects misc 40
359
+ 1247 shampoo shampoo 3 40 7 otherprop Objects n04183516 shampoo.n.01 objects 39
360
+ 592 paper tray paper tray 3 40 7 paper tray otherprop Objects objects 39
361
+ 385 cabinet door cabinet door 3 8 12 door door Wall door door 4
362
+ 1248 changing station changing station 3 40 7 otherprop Objects misc 40
363
+ 1249 poster printer poster printer 3 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37
364
+ 133 screen screen 3 40 7 otherprop Objects n03151077 curtain.n.01 curtain 12
365
+ 301 soap bar soap bar 3 38 7 bar otherstructure Objects objects 39
366
+ 1250 crutches crutches 3 40 7 otherprop Objects n03141823 crutch.n.01 objects 39
367
+ 379 studio light studio light 3 38 7 light otherstructure Objects lighting 28
368
+ 130 stack of cups cup 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
369
+ 1251 toilet flush button toilet flush button 3 40 7 otherprop Objects objects 39
370
+ 450 trunk trunk 3 40 7 otherprop Objects misc 40
371
+ 1252 grocery bag grocery bag 3 37 7 bag bag Objects suitcase 2773838 n03461288 grocery_bag.n.01 objects 39
372
+ 316 plastic bin plastic bin 3 40 7 bin otherprop Objects objects 39
373
+ 1253 pizza box pizza box 3 29 7 box box Objects objects 39
374
+ 385 cabinet doors cabinet door 3 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 door 4
375
+ 1254 legs legs 3 31 7 person person Objects person n05217688 person.n.02 misc 40
376
+ 461 car car 3 40 7 car otherprop Objects car car 2958343 n02958343 car.n.01 misc 40
377
+ 1255 shaving cream shaving cream 3 40 7 otherprop Objects n04186051 shaving_cream.n.01 objects 39
378
+ 1256 luggage stand luggage stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
379
+ 599 shredder shredder 3 40 7 otherprop Objects n04210120 shredder.n.01 objects 39
380
+ 281 statue statue 3 40 7 sculpture otherprop Objects n04306847 statue.n.01 misc 40
381
+ 1257 urinal urinal 3 33 7 toilet toilet Objects toilet toilet n04515991 urinal.n.01 toilet 18
382
+ 1258 hose hose 3 40 7 otherprop Objects n03539875 hose.n.03 misc 40
383
+ 1259 bike pump bike pump 3 40 7 otherprop Objects objects 39
384
+ 319 coatrack coatrack 3 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31
385
+ 1260 bear bear 3 40 7 otherprop Objects objects 39
386
+ 28 wall lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
387
+ 1261 humidifier humidifier 3 40 7 otherprop Objects objects 39
388
+ 546 toothpaste toothpaste 3 40 7 toothpaste otherprop Objects objects 39
389
+ 1262 mouthwash bottle mouthwash bottle 3 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
390
+ 1263 poster cutter poster cutter 3 40 7 otherprop Objects objects 39
391
+ 1264 golf bag golf bag 3 37 7 bag bag Objects suitcase 2773838 n03445617 golf_bag.n.01 objects 39
392
+ 1265 food container food container 3 40 7 container otherprop Objects n03094503 container.n.01 objects 39
393
+ 1266 camera camera 3 40 7 otherprop Objects objects 39
394
+ 28 table lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n04380533 table_lamp.n.01 lighting 28
395
+ 1267 yoga mat yoga mat 3 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2
396
+ 1268 card card 3 40 7 otherprop Objects objects 39
397
+ 1269 mug mug 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
398
+ 188 shower doors shower door 3 38 7 otherstructure Objects n04208936 shower.n.01 door 4
399
+ 689 cardboard cardboard 3 40 7 otherprop Objects objects 39
400
+ 1270 rack stand rack stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
401
+ 1271 boxes of paper boxes of paper 3 29 7 box box Objects n02883344 box.n.01 objects 39
402
+ 1272 flag flag 3 40 7 otherprop Objects misc 40
403
+ 354 futon futon 3 39 6 mattress otherfurniture Furniture n03408444 futon.n.01 sofa 10
404
+ 339 magazine magazine 3 40 7 magazine otherprop Objects n06595351 magazine.n.01 objects 39
405
+ 1009 exit sign exit sign 3 40 7 exit sign otherprop Objects misc 40
406
+ 1273 rolled poster rolled poster 3 40 7 otherprop Objects objects 39
407
+ 1274 wheel wheel 3 40 7 otherprop Objects objects 39
408
+ 15 pictures picture 3 11 8 picture picture Picture n03931044 picture.n.01 picture 6
409
+ 1275 blackboard eraser blackboard eraser 3 40 7 eraser otherprop Objects n03294833 eraser.n.01 objects 39
410
+ 361 organizer organizer 3 40 7 otherprop Objects n03918737 personal_digital_assistant.n.01 objects 39
411
+ 1276 doll doll 3 40 7 toy otherprop Objects n03219135 doll.n.01 objects 39
412
+ 326 book rack book rack 3 39 6 bookrack otherfurniture Furniture objects 39
413
+ 1277 laundry bag laundry bag 3 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39
414
+ 1278 sponge sponge 3 40 7 otherprop Objects n01906749 sponge.n.04 objects 39
415
+ 116 seating seat 3 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36
416
+ 1184 folded chairs folded chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
417
+ 1279 lotion bottle lotion bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
418
+ 212 can can 2 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39
419
+ 1280 lunch box lunch box 2 40 7 otherprop Objects objects 39
420
+ 1281 food display food display 2 40 7 otherprop Objects misc 40
421
+ 794 storage shelf storage shelf 2 40 7 otherprop Objects shelving 31
422
+ 1282 sliding wood door sliding wood door 2 40 7 otherprop Objects door 4
423
+ 955 pants pants 2 40 7 otherprop Objects n04489008 trouser.n.01 clothes 38
424
+ 387 wood wood 2 40 7 otherprop Objects misc 40
425
+ 69 boards board 2 38 7 board otherstructure Objects board_panel 35
426
+ 65 bottles bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
427
+ 523 washcloth washcloth 2 40 7 otherprop Objects n04554523 washcloth.n.01 towel 20
428
+ 389 workbench workbench 2 39 6 bench otherfurniture Furniture bench table 4379243 n04600486 workbench.n.01 table 5
429
+ 29 open kitchen cabinet kitchen cabinet 2 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7
430
+ 1283 organizer shelf organizer shelf 2 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
431
+ 146 frame frame 2 38 7 otherstructure Objects misc 40
432
+ 130 cups cup 2 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
433
+ 372 exercise ball exercise ball 2 40 7 ball otherprop Objects n04285146 sports_equipment.n.01 gym_equipment 33
434
+ 289 easel easel 2 39 6 stand otherfurniture Furniture n03262809 easel.n.01 furniture 36
435
+ 440 garbage bag garbage bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
436
+ 321 roomba roomba 2 40 7 otherprop Objects objects 39
437
+ 976 garage door garage door 2 38 7 garage door otherstructure Objects door door 4
438
+ 1256 luggage rack luggage stand 2 39 6 stand otherfurniture Furniture n04038440 shelving 31
439
+ 1284 bike lock bike lock 2 40 7 otherprop Objects objects 39
440
+ 1285 briefcase briefcase 2 40 7 otherprop Objects n02900705 briefcase.n.01 objects 39
441
+ 357 hand towel hand towel 2 27 7 towel towel Objects n03490006 hand_towel.n.01 towel 20
442
+ 1286 bath products bath product 2 40 7 otherprop Objects objects 39
443
+ 1287 star star 2 40 7 otherprop Objects n09444783 star.n.03 misc 40
444
+ 365 map map 2 40 7 map otherprop Objects n03720163 map.n.01 misc 40
445
+ 1288 coffee bean bag coffee bean bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
446
+ 81 headboard headboard 2 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 bed 11
447
+ 1289 ipad ipad 2 40 7 otherprop Objects objects 39
448
+ 1290 display rack display rack 2 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
449
+ 948 traffic cone traffic cone 2 40 7 cone otherprop Objects cone objects 39
450
+ 174 toiletry toiletry 2 40 7 otherprop Objects n04447443 toiletry.n.01 objects 39
451
+ 1028 canopy canopy 2 40 7 otherprop Objects misc 40
452
+ 1291 massage chair massage chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
453
+ 1292 paper organizer paper organizer 2 40 7 otherprop Objects objects 39
454
+ 1005 barricade barricade 2 40 7 otherprop Objects misc 40
455
+ 235 platform platform 2 38 7 otherstructure Objects misc 40
456
+ 1293 cap cap 2 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38
457
+ 1294 dumbbell plates dumbbell plates 2 40 7 otherprop Objects objects 39
458
+ 1295 elevator elevator 2 38 7 otherstructure Objects misc 40
459
+ 1296 cooking pan cooking pan 2 40 7 pan otherprop Objects n03880531 pan.n.01 objects 39
460
+ 1297 trash bag trash bag 2 37 7 bag bag Objects objects 39
461
+ 1298 santa santa 2 40 7 otherprop Objects misc 40
462
+ 1299 jewelry box jewelry box 2 29 7 box box Objects n02883344 box.n.01 objects 39
463
+ 1300 boat boat 2 40 7 otherprop Objects misc 40
464
+ 1301 sock sock 2 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38
465
+ 1051 kinect kinect 2 40 7 kinect otherprop Objects objects 39
466
+ 566 crib crib 2 39 6 crib otherfurniture Furniture furniture 36
467
+ 1302 plastic storage bin plastic storage bin 2 40 7 container otherprop Objects n03094503 container.n.01 objects 39
468
+ 1062 cooler cooler 2 24 6 refridgerator refridgerator Furniture n03102654 cooler.n.01 appliances 37
469
+ 1303 kitchen apron kitchen apron 2 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
470
+ 1304 dishwashing soap bottle dishwashing soap bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
471
+ 1305 xbox controller xbox controller 2 40 7 otherprop Objects objects 39
472
+ 1306 banana holder banana holder 2 40 7 otherprop Objects objects 39
473
+ 298 ping pong paddle ping pong paddle 2 40 7 otherprop Objects table 5
474
+ 1307 airplane airplane 2 40 7 otherprop Objects misc 40
475
+ 1308 conditioner bottle conditioner bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
476
+ 1309 tea kettle tea kettle 2 40 7 tea kettle otherprop Objects n04397768 teakettle.n.01 objects 39
477
+ 43 bedframe bedframe 2 39 6 otherfurniture Furniture n02822579 bedstead.n.01 bed 11
478
+ 1310 wood beam wood beam 2 38 7 otherstructure Objects beam 29
479
+ 593 toilet paper package toilet paper package 2 40 7 otherprop Objects objects 39
480
+ 1311 wall mounted coat rack wall mounted coat rack 2 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31
481
+ 1312 film light film light 2 40 7 otherprop Objects lighting 28
482
+ 749 ceiling lamp ceiling lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
483
+ 623 chain chain 1 40 7 otherprop Objects chair 3
484
+ 1313 sofa sofa 1 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10
485
+ 99 closet wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
486
+ 265 sweater sweater 1 40 7 otherprop Objects n04370048 sweater.n.01 clothes 38
487
+ 1314 kitchen mixer kitchen mixer 1 40 7 otherprop Objects appliances 37
488
+ 99 wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
489
+ 1315 water softener water softener 1 40 7 otherprop Objects misc 40
490
+ 448 banister banister 1 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30
491
+ 257 trolley trolley 1 40 7 trolley otherprop Objects n04335435 streetcar.n.01 misc 40
492
+ 1316 pantry shelf pantry shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
493
+ 786 sofa bed sofa bed 1 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
494
+ 801 loofa loofa 1 40 7 otherprop Objects objects 39
495
+ 972 shower faucet handle shower faucet handle 1 40 7 handle otherprop Objects shower 23
496
+ 1317 toy piano toy piano 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39
497
+ 1318 fish fish 1 40 7 otherprop Objects n02512053 fish.n.01 objects 39
498
+ 75 file cabinets file cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n03337140 file.n.03 cabinet 7
499
+ 657 cat litter box cat litter box 1 29 7 box box Objects objects 39
500
+ 561 electric panel electric panel 1 40 7 otherprop Objects misc 40
501
+ 93 suitcases suitcase 1 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39
502
+ 513 curtain rod curtain rod 1 38 7 curtain rod otherstructure Objects curtain 12
503
+ 411 bunk bed bunk bed 1 39 6 bunk bed otherfurniture Furniture bed bed bed 2818832 n02920259 bunk_bed.n.01 bed 11
504
+ 1122 chandelier chandelier 1 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 lighting 28
505
+ 922 tape tape 1 40 7 tape otherprop Objects objects 39
506
+ 88 plates plate 1 40 7 otherprop Objects n03959485 plate.n.04 objects 39
507
+ 518 alarm alarm 1 40 7 alarm otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39
508
+ 814 fire hose fire hose 1 40 7 otherprop Objects n03346004 fire_hose.n.01 misc 40
509
+ 1319 toy dinosaur toy dinosaur 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39
510
+ 1320 cone cone 1 40 7 otherprop Objects objects 39
511
+ 649 glass doors glass door 1 8 12 door door Wall door n03221720 door.n.01 door 4
512
+ 607 hatrack hatrack 1 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31
513
+ 819 subwoofer subwoofer 1 40 7 speaker otherprop Objects speaker 3691459 n04349401 subwoofer.n.01 objects 39
514
+ 1321 fire sprinkler fire sprinkler 1 40 7 otherprop Objects misc 40
515
+ 1322 trash cabinet trash cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
516
+ 1204 pantry walls pantry wall 1 1 12 wall wall Wall n04546855 wall.n.01 wall 1
517
+ 227 photo photo 1 40 7 photo otherprop Objects n03925226 photograph.n.01 picture 6
518
+ 817 barrier barrier 1 40 7 otherprop Objects n02796623 barrier.n.01 misc 40
519
+ 130 stacks of cups cup 1 40 7 otherprop Objects n03147509 cup.n.01 objects 39
520
+ 712 beachball beachball 1 40 7 ball otherprop Objects n02814224 beach_ball.n.01 objects 39
521
+ 1323 folded boxes folded boxes 1 40 7 otherprop Objects objects 39
522
+ 1324 contact lens solution bottle contact lens solution bottle 1 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
523
+ 673 covered box covered box 1 29 7 box box Objects objects 39
524
+ 459 folder folder 1 40 7 folder otherprop Objects n03376279 folder.n.02 objects 39
525
+ 643 mail trays mail tray 1 40 7 mail tray otherprop Objects objects 39
526
+ 238 slipper slipper 1 40 7 otherprop Objects n04241394 slipper.n.01 clothes 38
527
+ 765 magazine rack magazine rack 1 39 6 stand otherfurniture Furniture n03704549 magazine_rack.n.01 shelving 31
528
+ 1008 sticker sticker 1 40 7 sticker otherprop Objects n07272545 gummed_label.n.01 objects 39
529
+ 225 lotion lotion 1 40 7 otherprop Objects n03690938 lotion.n.01 objects 39
530
+ 1083 buddha buddha 1 40 7 otherprop Objects objects 39
531
+ 813 file organizer file organizer 1 40 7 otherprop Objects objects 39
532
+ 138 paper towel rolls paper towel roll 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20
533
+ 1145 night lamp night lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
534
+ 796 fuse box fuse box 1 40 7 otherprop Objects misc 40
535
+ 1325 knife block knife block 1 40 7 otherprop Objects objects 39
536
+ 363 furnace furnace 1 39 6 furnace otherfurniture Furniture n03404449 furnace.n.01
537
+ 1174 cd cases cd case 1 40 7 otherprop Objects objects 39
538
+ 38 stools stool 1 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19
539
+ 1326 hand sanitzer dispenser hand sanitzer dispenser 1 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39
540
+ 997 teapot teapot 1 40 7 tea pot otherprop Objects n04398044 teapot.n.01 objects 39
541
+ 1327 pen holder pen holder 1 40 7 otherprop Objects objects 39
542
+ 1328 tray rack tray rack 1 40 7 otherprop Objects objects 39
543
+ 1329 wig wig 1 40 7 otherprop Objects n04584207 wig.n.01 objects 39
544
+ 182 switch switch 1 40 7 otherprop Objects n04372370 switch.n.01 misc 40
545
+ 280 plastic containers plastic container 1 40 7 container otherprop Objects n03094503 container.n.01 objects 39
546
+ 1330 night light night light 1 40 7 otherprop Objects lighting 28
547
+ 1331 notepad notepad 1 40 7 otherprop Objects objects 39
548
+ 1332 mail bin mail bin 1 40 7 otherprop Objects misc 40
549
+ 1333 elevator button elevator button 1 40 7 otherprop Objects misc 40
550
+ 939 gaming wheel gaming wheel 1 40 7 otherprop Objects objects 39
551
+ 1334 drum set drum set 1 40 7 otherprop Objects objects 39
552
+ 480 cosmetic bag cosmetic bag 1 37 7 bag bag Objects objects 39
553
+ 907 coffee mug coffee mug 1 40 7 vessel otherprop Objects cup or mug 3797390 n03063599 coffee_mug.n.01 objects 39
554
+ 1335 closet shelf closet shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
555
+ 1336 baby mobile baby mobile 1 40 7 otherprop Objects objects 39
556
+ 829 diaper bin diaper bin 1 40 7 bin otherprop Objects objects 39
557
+ 947 door wall door wall 1 1 12 wall wall Wall wall 1
558
+ 1116 stepstool stepstool 1 40 7 step stool otherprop Objects objects 39
559
+ 599 paper shredder shredder 1 40 7 otherprop Objects n04210120 shredder.n.01 objects 39
560
+ 733 dress rack dress rack 1 40 7 otherprop Objects n03238762 dress_rack.n.01 misc 40
561
+ 123 cover cover 1 40 7 blanket otherprop Objects objects 39
562
+ 506 shopping bag shopping bag 1 37 7 bag bag Objects n04204081 shopping_bag.n.01 objects 39
563
+ 569 sliding door sliding door 1 8 12 door door Wall door n04239074 sliding_door.n.01 door 4
564
+ 1337 exercise bike exercise bike 1 40 7 machine otherprop Objects n04210120 shredder.n.01 gym_equipment 33
565
+ 1338 recliner chair recliner chair 1 5 4 chair chair Chair chair chair chair 3001627 n03238762 dress_rack.n.01 chair 3
566
+ 1314 kitchenaid mixer kitchen mixer 1 40 7 otherprop Objects appliances 37
567
+ 1339 soda can soda can 1 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39
568
+ 1340 stovetop stovetop 1 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37
569
+ 851 stepladder stepladder 1 39 6 ladder otherfurniture Furniture stairs n04315599 step_ladder.n.01 stairs 16
570
+ 142 tap tap 1 40 7 faucet otherprop Objects faucet 3325088 n04559451 water_faucet.n.01 objects 39
571
+ 436 cable cable 1 40 7 cables otherprop Objects objects 39
572
+ 1341 baby changing station baby changing station 1 39 6 otherfurniture Furniture furniture 36
573
+ 1342 costume costume 1 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
574
+ 885 rocking chair rocking chair 1 5 4 chair chair Chair chair chair chair 3001627 n04099969 rocking_chair.n.01 chair 3
575
+ 693 binder binder 1 40 7 binder otherprop Objects objects 39
576
+ 815 media center media center 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
577
+ 401 towel rack towel rack 1 40 7 otherprop Objects n04459773 towel_rack.n.01 misc 40
578
+ 1343 medal medal 1 40 7 otherprop Objects objects 39
579
+ 1184 stack of folded chairs folded chair 1 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
580
+ 1344 telescope telescope 1 40 7 otherprop Objects n04403638 telescope.n.01 objects 39
581
+ 1345 closet doorframe closet doorframe 1 8 12 door door Wall door door 4
582
+ 160 glass glass 1 38 7 glass otherstructure Objects n03438257 glass.n.02 misc 40
583
+ 1126 baseball cap baseball cap 1 40 7 otherprop Objects cap 2954340 n02799323 baseball_cap.n.01 clothes 38
584
+ 1346 battery disposal jar battery disposal jar 1 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39
585
+ 332 mop mop 1 40 7 otherprop Objects n04367480 swab.n.02 objects 39
586
+ 397 tank tank 1 40 7 otherprop Objects objects 39
587
+ 643 mail tray mail tray 1 40 7 mail tray otherprop Objects objects 39
588
+ 551 centerpiece centerpiece 1 40 7 centerpiece otherprop Objects n02994419 centerpiece.n.02 objects 39
589
+ 1163 stick stick 1 40 7 stick otherprop Objects objects 39
590
+ 1347 closet floor closet floor 1 2 5 floor floor Floor n03365592 floor.n.01 floor 2
591
+ 1348 dryer sheets dryer sheets 1 40 7 otherprop Objects objects 39
592
+ 803 bycicle bycicle 1 40 7 otherprop Objects misc 40
593
+ 484 flower stand flower stand 1 39 6 stand otherfurniture Furniture furniture 36
594
+ 1349 air mattress air mattress 1 4 1 bed bed Bed bed bed bed 2818832 n02690809 air_mattress.n.01 bed 11
595
+ 1350 clip clip 1 40 7 otherprop Objects objects 39
596
+ 222 side table side table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
597
+ 1253 pizza boxes pizza box 1 29 7 box box Objects n02883344 box.n.01 objects 39
598
+ 1351 display display 1 39 7 otherfurniture Furniture n03211117 display.n.06 misc 40
599
+ 1352 postcard postcard 1 40 7 otherprop Objects objects 39
600
+ 828 display sign display sign 1 40 7 sign otherprop Objects misc 40
601
+ 1353 paper towel paper towel 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20
602
+ 612 boots boot 1 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
603
+ 1354 tennis racket bag tennis racket bag 1 40 7 otherprop Objects objects 39
604
+ 1355 air hockey table air hockey table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
605
+ 1301 socks sock 1 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38
606
+ 1356 food bag food bag 1 37 7 bag bag Objects objects 39
607
+ 1199 clothes hangers clothes hanger 1 40 7 otherprop Objects n03057920 coat_hanger.n.01 misc 40
608
+ 1357 starbucks cup starbucks cup 1 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
assets/meta/scannetv2_raw_categories.json ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "wall",
3
+ "chair",
4
+ "books",
5
+ "floor",
6
+ "door",
7
+ "object",
8
+ "window",
9
+ "table",
10
+ "trash can",
11
+ "pillow",
12
+ "picture",
13
+ "ceiling",
14
+ "box",
15
+ "doorframe",
16
+ "monitor",
17
+ "cabinet",
18
+ "desk",
19
+ "shelf",
20
+ "office chair",
21
+ "towel",
22
+ "couch",
23
+ "sink",
24
+ "backpack",
25
+ "lamp",
26
+ "bed",
27
+ "bookshelf",
28
+ "mirror",
29
+ "curtain",
30
+ "plant",
31
+ "whiteboard",
32
+ "radiator",
33
+ "book",
34
+ "kitchen cabinet",
35
+ "toilet paper",
36
+ "kitchen cabinets",
37
+ "armchair",
38
+ "shoes",
39
+ "coffee table",
40
+ "toilet",
41
+ "bag",
42
+ "clothes",
43
+ "keyboard",
44
+ "bottle",
45
+ "recycling bin",
46
+ "nightstand",
47
+ "stool",
48
+ "tv",
49
+ "file cabinet",
50
+ "dresser",
51
+ "computer tower",
52
+ "clothing",
53
+ "telephone",
54
+ "cup",
55
+ "refrigerator",
56
+ "end table",
57
+ "jacket",
58
+ "shower curtain",
59
+ "bathtub",
60
+ "microwave",
61
+ "kitchen counter",
62
+ "sofa chair",
63
+ "paper towel dispenser",
64
+ "bathroom vanity",
65
+ "suitcase",
66
+ "laptop",
67
+ "ottoman",
68
+ "shower walls",
69
+ "printer",
70
+ "counter",
71
+ "board",
72
+ "soap dispenser",
73
+ "stove",
74
+ "light",
75
+ "closet wall",
76
+ "mini fridge",
77
+ "cabinets",
78
+ "doors",
79
+ "fan",
80
+ "tissue box",
81
+ "blanket",
82
+ "bathroom stall",
83
+ "copier",
84
+ "bench",
85
+ "bar",
86
+ "soap dish",
87
+ "laundry hamper",
88
+ "storage bin",
89
+ "bathroom stall door",
90
+ "light switch",
91
+ "coffee maker",
92
+ "tv stand",
93
+ "decoration",
94
+ "ceiling light",
95
+ "range hood",
96
+ "blackboard",
97
+ "clock",
98
+ "wardrobe closet",
99
+ "rail",
100
+ "bulletin board",
101
+ "mat",
102
+ "trash bin",
103
+ "ledge",
104
+ "seat",
105
+ "mouse",
106
+ "basket",
107
+ "shower",
108
+ "dumbbell",
109
+ "paper",
110
+ "person",
111
+ "windowsill",
112
+ "closet",
113
+ "bucket",
114
+ "sign",
115
+ "speaker",
116
+ "dishwasher",
117
+ "container",
118
+ "stair rail",
119
+ "shower curtain rod",
120
+ "tube",
121
+ "bathroom cabinet",
122
+ "papers",
123
+ "storage container",
124
+ "paper bag",
125
+ "paper towel roll",
126
+ "ball",
127
+ "closet doors",
128
+ "laundry basket",
129
+ "cart",
130
+ "closet door",
131
+ "dish rack",
132
+ "stairs",
133
+ "blinds",
134
+ "stack of chairs",
135
+ "purse",
136
+ "bicycle",
137
+ "tray",
138
+ "plunger",
139
+ "paper cutter",
140
+ "toilet paper dispenser",
141
+ "boxes",
142
+ "bin",
143
+ "toilet seat cover dispenser",
144
+ "guitar",
145
+ "mailboxes",
146
+ "handicap bar",
147
+ "fire extinguisher",
148
+ "ladder",
149
+ "column",
150
+ "pipe",
151
+ "vacuum cleaner",
152
+ "plate",
153
+ "piano",
154
+ "water cooler",
155
+ "cd case",
156
+ "bowl",
157
+ "closet rod",
158
+ "bathroom counter",
159
+ "oven",
160
+ "stand",
161
+ "scale",
162
+ "washing machine",
163
+ "broom",
164
+ "hat",
165
+ "shower wall",
166
+ "guitar case",
167
+ "rack",
168
+ "water pitcher",
169
+ "laundry detergent",
170
+ "hair dryer",
171
+ "pillar",
172
+ "divider",
173
+ "power outlet",
174
+ "dining table",
175
+ "shower floor",
176
+ "washing machines",
177
+ "shower door",
178
+ "coffee kettle",
179
+ "wardrobe cabinet",
180
+ "structure",
181
+ "bookshelves",
182
+ "clothes dryer",
183
+ "toaster",
184
+ "shoe",
185
+ "ironing board",
186
+ "alarm clock",
187
+ "shower head",
188
+ "lamp base",
189
+ "water bottle",
190
+ "keyboard piano",
191
+ "projector screen",
192
+ "case of water bottles",
193
+ "toaster oven",
194
+ "music stand",
195
+ "staircase",
196
+ "coat rack",
197
+ "storage organizer",
198
+ "machine",
199
+ "folded chair",
200
+ "fire alarm",
201
+ "fireplace",
202
+ "vent",
203
+ "furniture",
204
+ "power strip",
205
+ "calendar",
206
+ "poster",
207
+ "toilet paper holder",
208
+ "potted plant",
209
+ "stuffed animal",
210
+ "luggage",
211
+ "curtains",
212
+ "headphones",
213
+ "crate",
214
+ "candle",
215
+ "projector",
216
+ "clothes dryers",
217
+ "mattress",
218
+ "dustpan",
219
+ "drawer",
220
+ "rod",
221
+ "globe",
222
+ "footrest",
223
+ "piano bench",
224
+ "breakfast bar",
225
+ "step stool",
226
+ "hand rail",
227
+ "vending machine",
228
+ "ceiling fan",
229
+ "swiffer",
230
+ "foosball table",
231
+ "jar",
232
+ "footstool",
233
+ "folded table",
234
+ "round table",
235
+ "hamper",
236
+ "poster tube",
237
+ "case",
238
+ "carpet",
239
+ "thermostat",
240
+ "coat",
241
+ "water fountain",
242
+ "smoke detector",
243
+ "pillows",
244
+ "flip flops",
245
+ "cloth",
246
+ "banner",
247
+ "clothes hanger",
248
+ "whiteboard eraser",
249
+ "iron",
250
+ "instrument case",
251
+ "toilet paper rolls",
252
+ "soap",
253
+ "block",
254
+ "wall hanging",
255
+ "kitchen island",
256
+ "pipes",
257
+ "toothbrush",
258
+ "shirt",
259
+ "cutting board",
260
+ "vase",
261
+ "shower control valve",
262
+ "exercise machine",
263
+ "compost bin",
264
+ "shorts",
265
+ "tire",
266
+ "teddy bear",
267
+ "bathrobe",
268
+ "handrail",
269
+ "faucet",
270
+ "pantry wall",
271
+ "thermos",
272
+ "rug",
273
+ "couch cushions",
274
+ "tripod",
275
+ "mailbox",
276
+ "tupperware",
277
+ "shoe rack",
278
+ "towels",
279
+ "beer bottles",
280
+ "treadmill",
281
+ "salt",
282
+ "chest",
283
+ "dispenser",
284
+ "mirror doors",
285
+ "remote",
286
+ "folded ladder",
287
+ "cushion",
288
+ "carton",
289
+ "step",
290
+ "drying rack",
291
+ "slippers",
292
+ "pool table",
293
+ "soda stream",
294
+ "toilet brush",
295
+ "loft bed",
296
+ "cooking pot",
297
+ "heater",
298
+ "messenger bag",
299
+ "stapler",
300
+ "closet walls",
301
+ "scanner",
302
+ "elliptical machine",
303
+ "kettle",
304
+ "metronome",
305
+ "dumbell",
306
+ "music book",
307
+ "rice cooker",
308
+ "dart board",
309
+ "sewing machine",
310
+ "grab bar",
311
+ "flowerpot",
312
+ "painting",
313
+ "railing",
314
+ "stair",
315
+ "toolbox",
316
+ "nerf gun",
317
+ "binders",
318
+ "desk lamp",
319
+ "quadcopter",
320
+ "pitcher",
321
+ "hanging",
322
+ "mail",
323
+ "closet ceiling",
324
+ "hoverboard",
325
+ "beanbag chair",
326
+ "water heater",
327
+ "spray bottle",
328
+ "rope",
329
+ "plastic container",
330
+ "soap bottle",
331
+ "ikea bag",
332
+ "sleeping bag",
333
+ "duffel bag",
334
+ "frying pan",
335
+ "oven mitt",
336
+ "pot",
337
+ "hand dryer",
338
+ "dollhouse",
339
+ "shampoo bottle",
340
+ "hair brush",
341
+ "tennis racket",
342
+ "display case",
343
+ "ping pong table",
344
+ "boiler",
345
+ "bag of coffee beans",
346
+ "bananas",
347
+ "carseat",
348
+ "helmet",
349
+ "umbrella",
350
+ "coffee box",
351
+ "envelope",
352
+ "wet floor sign",
353
+ "clothing rack",
354
+ "controller",
355
+ "bath walls",
356
+ "podium",
357
+ "storage box",
358
+ "dolly",
359
+ "shampoo",
360
+ "paper tray",
361
+ "cabinet door",
362
+ "changing station",
363
+ "poster printer",
364
+ "screen",
365
+ "soap bar",
366
+ "crutches",
367
+ "studio light",
368
+ "stack of cups",
369
+ "toilet flush button",
370
+ "trunk",
371
+ "grocery bag",
372
+ "plastic bin",
373
+ "pizza box",
374
+ "cabinet doors",
375
+ "legs",
376
+ "car",
377
+ "shaving cream",
378
+ "luggage stand",
379
+ "shredder",
380
+ "statue",
381
+ "urinal",
382
+ "hose",
383
+ "bike pump",
384
+ "coatrack",
385
+ "bear",
386
+ "wall lamp",
387
+ "humidifier",
388
+ "toothpaste",
389
+ "mouthwash bottle",
390
+ "poster cutter",
391
+ "golf bag",
392
+ "food container",
393
+ "camera",
394
+ "table lamp",
395
+ "yoga mat",
396
+ "card",
397
+ "mug",
398
+ "shower doors",
399
+ "cardboard",
400
+ "rack stand",
401
+ "boxes of paper",
402
+ "flag",
403
+ "futon",
404
+ "magazine",
405
+ "exit sign",
406
+ "rolled poster",
407
+ "wheel",
408
+ "pictures",
409
+ "blackboard eraser",
410
+ "organizer",
411
+ "doll",
412
+ "book rack",
413
+ "laundry bag",
414
+ "sponge",
415
+ "seating",
416
+ "folded chairs",
417
+ "lotion bottle",
418
+ "can",
419
+ "lunch box",
420
+ "food display",
421
+ "storage shelf",
422
+ "sliding wood door",
423
+ "pants",
424
+ "wood",
425
+ "boards",
426
+ "bottles",
427
+ "washcloth",
428
+ "workbench",
429
+ "open kitchen cabinet",
430
+ "organizer shelf",
431
+ "frame",
432
+ "cups",
433
+ "exercise ball",
434
+ "easel",
435
+ "garbage bag",
436
+ "roomba",
437
+ "garage door",
438
+ "luggage rack",
439
+ "bike lock",
440
+ "briefcase",
441
+ "hand towel",
442
+ "bath products",
443
+ "star",
444
+ "map",
445
+ "coffee bean bag",
446
+ "headboard",
447
+ "ipad",
448
+ "display rack",
449
+ "traffic cone",
450
+ "toiletry",
451
+ "canopy",
452
+ "massage chair",
453
+ "paper organizer",
454
+ "barricade",
455
+ "platform",
456
+ "cap",
457
+ "dumbbell plates",
458
+ "elevator",
459
+ "cooking pan",
460
+ "trash bag",
461
+ "santa",
462
+ "jewelry box",
463
+ "boat",
464
+ "sock",
465
+ "kinect",
466
+ "crib",
467
+ "plastic storage bin",
468
+ "cooler",
469
+ "kitchen apron",
470
+ "dishwashing soap bottle",
471
+ "xbox controller",
472
+ "banana holder",
473
+ "ping pong paddle",
474
+ "airplane",
475
+ "conditioner bottle",
476
+ "tea kettle",
477
+ "bedframe",
478
+ "wood beam",
479
+ "toilet paper package",
480
+ "wall mounted coat rack",
481
+ "film light",
482
+ "ceiling lamp",
483
+ "chain",
484
+ "sofa",
485
+ "closet wardrobe",
486
+ "sweater",
487
+ "kitchen mixer",
488
+ "wardrobe",
489
+ "water softener",
490
+ "banister",
491
+ "trolley",
492
+ "pantry shelf",
493
+ "sofa bed",
494
+ "loofa",
495
+ "shower faucet handle",
496
+ "toy piano",
497
+ "fish",
498
+ "file cabinets",
499
+ "cat litter box",
500
+ "electric panel",
501
+ "suitcases",
502
+ "curtain rod",
503
+ "bunk bed",
504
+ "chandelier",
505
+ "tape",
506
+ "plates",
507
+ "alarm",
508
+ "fire hose",
509
+ "toy dinosaur",
510
+ "cone",
511
+ "glass doors",
512
+ "hatrack",
513
+ "subwoofer",
514
+ "fire sprinkler",
515
+ "trash cabinet",
516
+ "pantry walls",
517
+ "photo",
518
+ "barrier",
519
+ "stacks of cups",
520
+ "beachball",
521
+ "folded boxes",
522
+ "contact lens solution bottle",
523
+ "covered box",
524
+ "folder",
525
+ "mail trays",
526
+ "slipper",
527
+ "magazine rack",
528
+ "sticker",
529
+ "lotion",
530
+ "buddha",
531
+ "file organizer",
532
+ "paper towel rolls",
533
+ "night lamp",
534
+ "fuse box",
535
+ "knife block",
536
+ "furnace",
537
+ "cd cases",
538
+ "stools",
539
+ "hand sanitzer dispenser",
540
+ "teapot",
541
+ "pen holder",
542
+ "tray rack",
543
+ "wig",
544
+ "switch",
545
+ "plastic containers",
546
+ "night light",
547
+ "notepad",
548
+ "mail bin",
549
+ "elevator button",
550
+ "gaming wheel",
551
+ "drum set",
552
+ "cosmetic bag",
553
+ "coffee mug",
554
+ "closet shelf",
555
+ "baby mobile",
556
+ "diaper bin",
557
+ "door wall",
558
+ "stepstool",
559
+ "paper shredder",
560
+ "dress rack",
561
+ "cover",
562
+ "shopping bag",
563
+ "sliding door",
564
+ "exercise bike",
565
+ "recliner chair",
566
+ "kitchenaid mixer",
567
+ "soda can",
568
+ "stovetop",
569
+ "stepladder",
570
+ "tap",
571
+ "cable",
572
+ "baby changing station",
573
+ "costume",
574
+ "rocking chair",
575
+ "binder",
576
+ "media center",
577
+ "towel rack",
578
+ "medal",
579
+ "stack of folded chairs",
580
+ "telescope",
581
+ "closet doorframe",
582
+ "glass",
583
+ "baseball cap",
584
+ "battery disposal jar",
585
+ "mop",
586
+ "tank",
587
+ "mail tray",
588
+ "centerpiece",
589
+ "stick",
590
+ "closet floor",
591
+ "dryer sheets",
592
+ "bycicle",
593
+ "flower stand",
594
+ "air mattress",
595
+ "clip",
596
+ "side table",
597
+ "pizza boxes",
598
+ "display",
599
+ "postcard",
600
+ "display sign",
601
+ "paper towel",
602
+ "boots",
603
+ "tennis racket bag",
604
+ "air hockey table",
605
+ "socks",
606
+ "food bag",
607
+ "clothes hangers",
608
+ "starbucks cup"
609
+ ]
leo/grounding_head.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from leo.utils import get_mlp_head
4
+
5
+
6
+ class SequentialGroundHead(nn.Module):
7
+ def __init__(self, hidden_size=4096):
8
+ super().__init__()
9
+ # grounding head
10
+ self.og3d_head = get_mlp_head(
11
+ hidden_size * 2, hidden_size // 2,
12
+ 1, dropout=0.1
13
+ )
14
+
15
+ def forward(self, obj_embeds, grd_embdes, obj_masks=None):
16
+ txt_embeds = grd_embdes
17
+ og3d_logits = self.og3d_head(torch.cat((obj_embeds, txt_embeds.repeat(1, obj_embeds.shape[1], 1)), dim=2)).squeeze(2)
18
+ if obj_masks is not None:
19
+ og3d_logits = og3d_logits.masked_fill_(obj_masks.logical_not(), -float('inf'))
20
+ return og3d_logits
leo/img_encoder.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import timm
2
+ import torch
3
+ import numpy as np
4
+ import torch.nn as nn
5
+ from einops import rearrange
6
+
7
+
8
+ def disabled_train(self, mode=True):
9
+ """
10
+ Overwrite model.train with this function to make sure train/eval mode does not change anymore
11
+ """
12
+ return self
13
+
14
+
15
+ def simple_conv_and_linear_weights_init(m):
16
+ if type(m) in [
17
+ nn.Conv1d,
18
+ nn.Conv2d,
19
+ nn.Conv3d,
20
+ nn.ConvTranspose1d,
21
+ nn.ConvTranspose2d,
22
+ nn.ConvTranspose3d,
23
+ ]:
24
+ weight_shape = list(m.weight.data.size())
25
+ fan_in = np.prod(weight_shape[1:4])
26
+ fan_out = np.prod(weight_shape[2:4]) * weight_shape[0]
27
+ w_bound = np.sqrt(6.0 / (fan_in + fan_out))
28
+ m.weight.data.uniform_(-w_bound, w_bound)
29
+ if m.bias is not None:
30
+ m.bias.data.fill_(0)
31
+ elif type(m) == nn.Linear:
32
+ simple_linear_weights_init(m)
33
+
34
+ def simple_linear_weights_init(m):
35
+ if type(m) == nn.Linear:
36
+ weight_shape = list(m.weight.data.size())
37
+ fan_in = weight_shape[1]
38
+ fan_out = weight_shape[0]
39
+ w_bound = np.sqrt(6.0 / (fan_in + fan_out))
40
+ m.weight.data.uniform_(-w_bound, w_bound)
41
+ if m.bias is not None:
42
+ m.bias.data.fill_(0)
43
+
44
+
45
+ class Backbone2DWrapper(nn.Module):
46
+
47
+ def __init__(self, model, tag, freeze=True):
48
+ super().__init__()
49
+ self.model = model
50
+ self.tag = tag
51
+ self.freeze = freeze
52
+ if 'convnext' in tag:
53
+ self.out_channels = 1024
54
+ elif 'swin' in tag:
55
+ self.out_channels = 1024
56
+ elif 'vit' in tag:
57
+ self.out_channels = 768
58
+ elif 'resnet' in tag:
59
+ self.out_channels = 2048
60
+ else:
61
+ raise NotImplementedError
62
+
63
+ if freeze:
64
+ for param in self.parameters():
65
+ param.requires_grad = False
66
+ self.eval()
67
+ self.train = disabled_train
68
+
69
+ def forward_normal(self, x, flat_output=False):
70
+ feat = self.model.forward_features(x)
71
+ if 'swin' in self.tag:
72
+ feat = rearrange(feat, 'b h w c -> b c h w')
73
+ if 'vit_base_32_timm_laion2b' in self.tag or 'vit_base_32_timm_openai' in self.tag:
74
+ # TODO: [CLS] is prepended to the patches.
75
+ feat = rearrange(feat[:, 1:], 'b (h w) c -> b c h w', h=7)
76
+ if flat_output:
77
+ feat = rearrange(feat, 'b c h w -> b (h w) c')
78
+ return feat
79
+
80
+ @torch.no_grad()
81
+ def forward_frozen(self, x, flat_output=False):
82
+ return self.forward_normal(x, flat_output)
83
+
84
+ def forward(self, x, flat_output=False):
85
+ if self.freeze:
86
+ return self.forward_frozen(x, flat_output)
87
+ else:
88
+ return self.forward_normal(x, flat_output)
89
+
90
+ def convnext_base_laion2b(pretrained=False, freeze=True, **kwargs):
91
+ m = timm.create_model(
92
+ 'convnext_base.clip_laion2b',
93
+ pretrained=pretrained
94
+ )
95
+ if kwargs.get('reset_clip_s2b2'):
96
+ s = m.state_dict()
97
+ for i in s.keys():
98
+ if 'stages.3.blocks.2' in i and ('weight' in i or 'bias' in i):
99
+ s[i].normal_()
100
+ m.load_state_dict(s, strict=True)
101
+
102
+ return Backbone2DWrapper(m, 'convnext_base_laion2b', freeze=freeze)
103
+
104
+
105
+ class GridFeatureExtractor2D(nn.Module):
106
+ def __init__(self, backbone_name='convnext_base', backbone_pretrain_dataset='laion2b', use_pretrain=True, freeze=True, pooling='avg'):
107
+ super().__init__()
108
+
109
+ init_func_name = '_'.join([backbone_name, backbone_pretrain_dataset])
110
+ init_func = globals().get(init_func_name)
111
+ if init_func and callable(init_func):
112
+ self.backbone = init_func(pretrained=use_pretrain, freeze=freeze)
113
+ else:
114
+ raise NotImplementedError(f"Backbone2D does not support {init_func_name}")
115
+
116
+ self.pooling = pooling
117
+ if self.pooling:
118
+ if self.pooling == 'avg':
119
+ self.pooling_layers = nn.Sequential(
120
+ nn.AdaptiveAvgPool2d(output_size=(1,1)),
121
+ nn.Flatten()
122
+ )
123
+ self.out_channels = self.backbone.out_channels
124
+ elif self.pooling == 'conv':
125
+ self.pooling_layers = nn.Sequential(
126
+ nn.Conv2d(self.backbone.out_channels, 64, 1),
127
+ nn.ReLU(inplace=True),
128
+ nn.Conv2d(64, 32, 1),
129
+ nn.Flatten()
130
+ )
131
+ self.pooling_layers.apply(simple_conv_and_linear_weights_init)
132
+ self.out_channels = 32 * 7 * 7 # hardcode for 224x224
133
+ elif self.pooling in ['attn', 'attention']:
134
+ self.visual_attention = nn.Sequential(
135
+ nn.Conv2d(self.backbone.out_channels, self.backbone.out_channels, 1),
136
+ nn.ReLU(inplace=True),
137
+ nn.Conv2d(self.backbone.out_channels, self.backbone.out_channels, 1),
138
+ )
139
+ self.visual_attention.apply(simple_conv_and_linear_weights_init)
140
+ def _attention_pooling(x):
141
+ B, C, H, W = x.size()
142
+ attn = self.visual_attention(x)
143
+ attn = attn.view(B, C, -1)
144
+ x = x.view(B, C, -1)
145
+ attn = attn.softmax(dim=-1)
146
+ x = torch.einsum('b c n, b c n -> b c', x, x)
147
+ return x
148
+ self.pooling_layers = _attention_pooling
149
+ self.out_channels = self.backbone.out_channels
150
+ else:
151
+ raise NotImplementedError(f"Backbone2D does not support {self.pooling} pooling")
152
+ else:
153
+ self.out_channels = self.backbone.out_channels
154
+
155
+ def forward(self, x):
156
+ if self.pooling:
157
+ x = self.backbone(x, flat_output=False)
158
+ x = self.pooling_layers(x).unsqueeze(1)
159
+ return x
160
+ else:
161
+ return self.backbone(x, flat_output=True)
leo/inference.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import torch
4
+ import numpy as np
5
+ from leo.model import SequentialGrounder
6
+ from leo.utils import LabelConverter, convert_pc_to_box, obj_processing_post, pad_sequence
7
+ from torch.utils.data import default_collate
8
+
9
+
10
+ ASSET_DIR = os.path.join(os.getcwd(), 'assets')
11
+ CKPT_DIR = os.path.join(os.getcwd(), 'checkpoint/leo')
12
+ int2cat = json.load(open(os.path.join(ASSET_DIR, "meta/scannetv2_raw_categories.json"), 'r', encoding="utf-8"))
13
+ cat2int = {w: i for i, w in enumerate(int2cat)}
14
+ label_converter = LabelConverter(os.path.join(ASSET_DIR, "meta/scannetv2-labels.combined.tsv"))
15
+
16
+
17
+ role_prompt = "You are an AI visual assistant situated in a 3D scene. "\
18
+ "You can perceive (1) an ego-view image (accessible when necessary) and (2) the objects (including yourself) in the scene (always accessible). "\
19
+ "You should properly respond to the USER's instruction according to the given visual information. "
20
+ #role_prompt = " "
21
+ egoview_prompt = "Ego-view image:"
22
+ objects_prompt = "Objects (including you) in the scene:"
23
+ task_prompt = "USER: {instruction} ASSISTANT:"
24
+
25
+ def get_prompt(instruction):
26
+ return {
27
+ 'prompt_before_obj': role_prompt,
28
+ 'prompt_middle_1': egoview_prompt,
29
+ 'prompt_middle_2': objects_prompt,
30
+ 'prompt_after_obj': task_prompt.format(instruction=instruction),
31
+ }
32
+
33
+ def get_lang(task_item):
34
+ task_description = task_item['task_description']
35
+ sentence = task_description
36
+ data_dict = get_prompt(task_description)
37
+
38
+ # scan_id = task_item['scan_id']
39
+
40
+ if 'action_steps' in task_item:
41
+ action_steps = task_item['action_steps']
42
+ # tgt_object_id = [int(action['target_id']) for action in action_steps]
43
+ # tgt_object_name = [action['label'] for action in action_steps]
44
+
45
+ for action in action_steps:
46
+ sentence += ' ' + action['action']
47
+
48
+ data_dict['output_gt'] = ' '.join([action['action'] + ' <s>' for action in action_steps])
49
+
50
+ # return scan_id, tgt_object_id, tgt_object_name, sentence, data_dict
51
+ return data_dict
52
+
53
+
54
+ def load_data(scan_id):
55
+ one_scan = {}
56
+ # load scan
57
+ pcd_data = torch.load(os.path.join(ASSET_DIR, f'inputs/{scan_id}', f'{scan_id}_pcd.pth'))
58
+ inst_to_label = torch.load(os.path.join(ASSET_DIR, f'inputs/{scan_id}', f'{scan_id}_inst.pth'))
59
+ points, colors, instance_labels = pcd_data[0], pcd_data[1], pcd_data[-1]
60
+ colors = colors / 127.5 - 1
61
+ pcds = np.concatenate([points, colors], 1)
62
+ one_scan['pcds'] = pcds
63
+ one_scan['instance_labels'] = instance_labels
64
+ one_scan['inst_to_label'] = inst_to_label
65
+ # convert to gt object
66
+ obj_pcds = []
67
+ inst_ids = []
68
+ inst_labels = []
69
+ bg_indices = np.full((points.shape[0], ), 1, dtype=np.bool_)
70
+ for inst_id in inst_to_label.keys():
71
+ if inst_to_label[inst_id] in cat2int.keys():
72
+ mask = instance_labels == inst_id
73
+ if np.sum(mask) == 0:
74
+ continue
75
+ obj_pcds.append(pcds[mask])
76
+ inst_ids.append(inst_id)
77
+ inst_labels.append(cat2int[inst_to_label[inst_id]])
78
+ if inst_to_label[inst_id] not in ['wall', 'floor', 'ceiling']:
79
+ bg_indices[mask] = False
80
+ one_scan['obj_pcds'] = obj_pcds
81
+ one_scan['inst_labels'] = inst_labels
82
+ one_scan['inst_ids'] = inst_ids
83
+ one_scan['bg_pcds'] = pcds[bg_indices]
84
+ # calculate box for matching
85
+ obj_center = []
86
+ obj_box_size = []
87
+ for obj_pcd in obj_pcds:
88
+ _c, _b = convert_pc_to_box(obj_pcd)
89
+ obj_center.append(_c)
90
+ obj_box_size.append(_b)
91
+ one_scan['obj_loc'] = obj_center
92
+ one_scan['obj_box'] = obj_box_size
93
+ # load point feat
94
+ feat_pth = os.path.join(ASSET_DIR, f'inputs/{scan_id}', 'obj_feats.pth')
95
+ one_scan['obj_feats'] = torch.load(feat_pth).to('cpu')
96
+ # convert to pq3d input
97
+ obj_labels = one_scan['inst_labels'] # N
98
+ obj_pcds = one_scan['obj_pcds']
99
+ obj_ids = one_scan['inst_ids']
100
+ # object filter
101
+ excluded_labels = ['wall', 'floor', 'ceiling']
102
+ def keep_obj(i, obj_label):
103
+ category = int2cat[obj_label]
104
+ # filter out background
105
+ if category in excluded_labels:
106
+ return False
107
+ # filter out objects not mentioned in the sentence
108
+ return True
109
+ selected_obj_idxs = [i for i, obj_label in enumerate(obj_labels) if keep_obj(i, obj_label)]
110
+ # crop objects to max_obj_len and reorganize ids ? # TODO
111
+ obj_labels = [obj_labels[i] for i in selected_obj_idxs]
112
+ obj_pcds = [obj_pcds[i] for i in selected_obj_idxs]
113
+ # subsample points
114
+ obj_pcds = np.array([obj_pcd[np.random.choice(len(obj_pcd), size=1024,
115
+ replace=len(obj_pcd) < 1024)] for obj_pcd in obj_pcds])
116
+ obj_fts, obj_locs, obj_boxes, rot_matrix = obj_processing_post(obj_pcds, rot_aug=False)
117
+ data_dict = {
118
+ "scan_id": scan_id,
119
+ "obj_fts": obj_fts.float(),
120
+ "obj_locs": obj_locs.float(),
121
+ "obj_labels": torch.LongTensor(obj_labels),
122
+ "obj_boxes": obj_boxes,
123
+ "obj_pad_masks": torch.ones((len(obj_locs)), dtype=torch.bool), # used for padding in collate
124
+ "obj_ids": torch.LongTensor([obj_ids[i] for i in selected_obj_idxs])
125
+ }
126
+ # convert point feature
127
+ data_dict['obj_feats'] = one_scan['obj_feats'].squeeze(0)
128
+
129
+ useful_keys = ['tgt_object_id', 'scan_id', 'obj_labels', 'data_idx',
130
+ 'obj_fts', 'obj_locs', 'obj_pad_masks', 'obj_ids',
131
+ 'source', 'prompt_before_obj', 'prompt_middle_1',
132
+ 'prompt_middle_2', 'prompt_after_obj', 'output_gt', 'obj_feats']
133
+ for k in list(data_dict.keys()):
134
+ if k not in useful_keys:
135
+ del data_dict[k]
136
+ # add new keys because of leo
137
+ data_dict['img_fts'] = torch.zeros(3, 224, 224)
138
+ data_dict['img_masks'] = torch.LongTensor([0]).bool()
139
+ data_dict['anchor_locs'] = torch.zeros(3)
140
+ data_dict['anchor_orientation'] = torch.zeros(4)
141
+ data_dict['anchor_orientation'][-1] = 1 # xyzw
142
+ # convert to leo format
143
+ data_dict['obj_masks'] = data_dict['obj_pad_masks']
144
+ del data_dict['obj_pad_masks']
145
+
146
+ return data_dict
147
+
148
+ def form_batch(data_dict):
149
+ batch = [data_dict]
150
+ new_batch = {}
151
+
152
+ # pad
153
+ padding_keys = ['obj_fts', 'obj_locs', 'obj_masks', 'obj_labels', 'obj_ids']
154
+ for k in padding_keys:
155
+ tensors = [sample.pop(k) for sample in batch]
156
+ padded_tensor = pad_sequence(tensors, pad=0)
157
+ new_batch[k] = padded_tensor
158
+ # # list
159
+ # list_keys = ['tgt_object_id']
160
+ # for k in list_keys:
161
+ # new_batch[k] = [sample.pop(k) for sample in batch]
162
+
163
+ # default collate
164
+ new_batch.update(default_collate(batch))
165
+ return new_batch
166
+
167
+
168
+ def inference(scan_id, task, predict_mode=False):
169
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
170
+ # device = 'cpu' # ok for predict_mode=False, and both for Gradio demo local preview
171
+
172
+ data_dict = load_data(scan_id)
173
+ data_dict.update(get_lang(task))
174
+ data_dict = form_batch(data_dict)
175
+
176
+ for key, value in data_dict.items():
177
+ if isinstance(value, torch.Tensor):
178
+ data_dict[key] = value.to(device)
179
+
180
+ model = SequentialGrounder(predict_mode)
181
+ load_msg = model.load_state_dict(torch.load(os.path.join(CKPT_DIR, 'pytorch_model.bin'), map_location='cpu'), strict=False)
182
+ model.to(device)
183
+
184
+ data_dict = model(data_dict)
185
+
186
+ if predict_mode == False:
187
+ # calculate result id
188
+ result_id_list = [data_dict['obj_ids'][0][torch.argmax(data_dict['ground_logits'][i]).item()]
189
+ for i in range(len(data_dict['ground_logits']))]
190
+ else:
191
+ # calculate langauge
192
+ # tgt_object_id = data_dict['tgt_object_id']
193
+ if data_dict['ground_logits'] == None:
194
+ og_pred = []
195
+ else:
196
+ og_pred = torch.argmax(data_dict['ground_logits'], dim=1)
197
+ grd_batch_ind_list = data_dict['grd_batch_ind_list']
198
+
199
+ response_pred = []
200
+ for i in range(1): # len(tgt_object_id)
201
+ # target_sequence = list(tgt_object_id[i].cpu().numpy())
202
+ predict_sequence = []
203
+ if og_pred != None:
204
+ for j in range(len(og_pred)):
205
+ if grd_batch_ind_list[j] == i:
206
+ predict_sequence.append(og_pred[j].item())
207
+
208
+ obj_ids = data_dict['obj_ids']
209
+ response_pred.append({
210
+ 'predict_object_id' : [obj_ids[i][o].item() for o in predict_sequence],
211
+ 'predict_object_id': [obj_ids[i][o].item() for o in predict_sequence],
212
+ 'pred_plan_text': data_dict['output_txt'][i]
213
+ })
214
+
215
+ return result_id_list if predict_mode == False else response_pred
216
+
217
+ if __name__ == '__main__':
218
+ inference("scene0050_00", {
219
+ "task_description": "Find the chair and move it to the table.",
220
+ "action_steps": [
221
+ {
222
+ "target_id": "1",
223
+ "label": "chair",
224
+ "action": "Find the chair."
225
+ },
226
+ {
227
+ "target_id": "2",
228
+ "label": "table",
229
+ "action": "Move the chair to the table."
230
+ }
231
+ ],
232
+ "scan_id": "scene0050_00"
233
+ }, predict_mode=True)
234
+
leo/model.py ADDED
@@ -0,0 +1,477 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextlib
2
+ import math
3
+
4
+ import clip
5
+ import torch
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ from einops import rearrange
9
+ from peft import LoraConfig, get_peft_model
10
+ from transformers import AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer
11
+ from leo.img_encoder import GridFeatureExtractor2D
12
+ from leo.pcd_encoder import OSE3D
13
+ from leo.grounding_head import SequentialGroundHead
14
+ from leo.utils import get_mlp_head
15
+
16
+
17
+ def maybe_autocast(model, dtype='bf16', enabled=True): ### not-half mode
18
+ # if on cpu, don't use autocast
19
+ # if on gpu, use autocast with dtype if provided, otherwise use torch.float16
20
+ enable_autocast = model.device != torch.device('cpu')
21
+
22
+ if dtype == 'bf16':
23
+ dtype = torch.bfloat16
24
+ elif dtype == 'fp16':
25
+ dtype == torch.float16
26
+ else:
27
+ dtype = torch.float32
28
+
29
+ if enable_autocast:
30
+ return torch.cuda.amp.autocast(dtype=dtype, enabled=enabled)
31
+ else:
32
+ return contextlib.nullcontext()
33
+
34
+ def disabled_train(self, mode=True):
35
+ """
36
+ Overwrite model.train with this function to make sure train/eval mode does not change anymore
37
+ """
38
+ return self
39
+
40
+
41
+ class SequentialGrounder(torch.nn.Module):
42
+ def __init__(self,predict_mode=False):
43
+ super().__init__()
44
+ cfg = {
45
+ "model": {
46
+ "llm": {
47
+ "name": "Vicuna7B",
48
+ "cfg_path": "/scratch/generalvision/vicuna-7b",
49
+ "truncation_side": "right",
50
+ "max_context_len": 256,
51
+ "max_out_len": 256,
52
+ "lora": {
53
+ "flag": True,
54
+ "rank": 16,
55
+ "alpha": 16,
56
+ "dropout": 0.0,
57
+ "target_modules": ['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],
58
+ },
59
+ },
60
+ "clip_txt_guidance": {
61
+ "flag": False,
62
+ "clip_out_dim": 1024,
63
+ },
64
+ },
65
+ }
66
+
67
+ self.predict_mode = predict_mode
68
+
69
+ # LLM
70
+ llm_name = 'Vicuna7B'
71
+ llm_cfg_path = '/scratch/generalvision/vicuna-7b'
72
+ llm_truncation_side = 'right'
73
+ if 'vicuna' in llm_name.lower():
74
+ self.llm_tokenizer = LlamaTokenizer.from_pretrained(llm_cfg_path, truncation_side=llm_truncation_side)
75
+ self.llm_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
76
+ self.llm_model = LlamaForCausalLM.from_pretrained(llm_cfg_path, torch_dtype=torch.float32) # not-half mode torch_dtype=torch.float16
77
+ self.llm_model.resize_token_embeddings(len(self.llm_tokenizer))
78
+ else:
79
+ self.llm_tokenizer = AutoTokenizer.from_pretrained(llm_cfg_path, truncation_side=llm_truncation_side)
80
+ self.llm_model = AutoModelForCausalLM.from_pretrained(llm_cfg_path, torch_dtype=torch.float16)
81
+
82
+ for param in self.llm_model.parameters():
83
+ param.requires_grad = False
84
+ self.llm_model.eval()
85
+ self.llm_model.train = disabled_train
86
+
87
+ # 2D vision
88
+ self.img_encoder = GridFeatureExtractor2D()
89
+ self.img_proj = nn.Linear(
90
+ self.img_encoder.out_channels, self.llm_model.config.hidden_size
91
+ )
92
+
93
+ # 3D vision
94
+ self.pcd_encoder = OSE3D()
95
+ self.pcd_proj = nn.Linear(256, self.llm_model.config.hidden_size)
96
+
97
+ # type embedding
98
+ # self.img_type_embed = nn.Parameter(torch.zeros(self.llm_model.config.hidden_size), requires_grad=True)
99
+ # self.pcd_type_embed = nn.Parameter(torch.zeros(self.llm_model.config.hidden_size), requires_grad=True)
100
+
101
+ # LoRA
102
+ if cfg['model']['llm']['lora']['flag']:
103
+ lora_config = LoraConfig(
104
+ r=cfg['model']['llm']['lora']['rank'],
105
+ lora_alpha=cfg['model']['llm']['lora']['alpha'],
106
+ target_modules=cfg['model']['llm']['lora']['target_modules'],
107
+ lora_dropout=cfg['model']['llm']['lora']['dropout'],
108
+ bias='none',
109
+ modules_to_save=[],
110
+ )
111
+ self.llm_model = get_peft_model(self.llm_model, peft_config=lora_config)
112
+
113
+ self.max_context_len = 256
114
+ self.max_out_len = 256
115
+
116
+ # additional text x multi-modal tokens fusion
117
+ self.clip_txt_guidance = cfg['model']['clip_txt_guidance']['flag']
118
+ if self.clip_txt_guidance:
119
+ self.clip_model = clip.load('RN50')[0]
120
+ for param in self.clip_model.parameters():
121
+ param.requires_grad = False
122
+ self.clip_model.eval()
123
+ self.clip_model.train = disabled_train
124
+ self.clip_proj = nn.Linear(cfg['clip_txt_guidance']['clip_out_dim'], self.llm_model.config.hidden_size)
125
+
126
+ # grounding head
127
+ self.ground_head = SequentialGroundHead()
128
+ self.obj_cls_head = get_mlp_head(4096, 768, 607, 0.3)
129
+ self.pre_grounding = True
130
+
131
+ @property
132
+ def device(self):
133
+ return list(self.parameters())[0].device
134
+
135
+ def build_right_justified_sequence(self, data_dict):
136
+ """
137
+ Concat six sequences: `prompt_before_obj`, `prompt_middle_1`, `img_tokens`, `prompt_middle_2`, `obj_tokens`, `prompt_after_obj`.
138
+ Return right justified sequence for causal LM: <pad>, <role/situation>, <img>, <objs>, <instruction>.
139
+ """
140
+ device = self.device
141
+ bs = len(data_dict['prompt_before_obj'])
142
+
143
+ self.llm_tokenizer.padding_side = 'left'
144
+ text_input_tokens_pre = self.llm_tokenizer(
145
+ data_dict['prompt_before_obj'],
146
+ return_tensors='pt',
147
+ padding='longest'
148
+ ).to(device) # [PAD, BOS, tokens], (B, T1)
149
+
150
+ text_input_tokens_mid1 = self.llm_tokenizer(
151
+ data_dict['prompt_middle_1'],
152
+ return_tensors='pt',
153
+ padding='longest'
154
+ ).to(device)
155
+
156
+ img_tokens = data_dict['img_tokens'].to(device)
157
+ img_masks = data_dict['img_masks'].to(device)
158
+ img_masks = img_masks.reshape(-1, 1).repeat(1, img_tokens.size(1))
159
+
160
+ text_input_tokens_mid2 = self.llm_tokenizer(
161
+ data_dict['prompt_middle_2'],
162
+ return_tensors='pt',
163
+ padding='longest'
164
+ ).to(device)
165
+
166
+ obj_tokens = data_dict['obj_tokens'].to(device)
167
+ obj_masks = data_dict['obj_masks'].to(device)
168
+
169
+ # additional clip fusion
170
+ if self.clip_txt_guidance:
171
+ with torch.no_grad():
172
+ clip_fts = self.clip_model.encode_text(
173
+ clip.tokenize(data_dict['prompt_after_obj'], truncate=True).to(device)
174
+ )
175
+ clip_fts = self.clip_proj(clip_fts)
176
+ # B, N, C
177
+ img_tokens = torch.einsum('bnc,bc->bnc', img_tokens, clip_fts)
178
+ obj_tokens = torch.einsum('bnc,bc->bnc', obj_tokens, clip_fts)
179
+
180
+ self.llm_tokenizer.padding_side = 'right' # no need to be 'left', as padding tokens will be shifted
181
+ self.llm_tokenizer.truncation_side = 'left' # truncate history
182
+ text_input_tokens_post = self.llm_tokenizer(
183
+ data_dict['prompt_after_obj'],
184
+ return_tensors='pt',
185
+ padding='longest',
186
+ truncation=True,
187
+ max_length=self.max_context_len,
188
+ ).to(device) # [BOS, tokens, PAD], (B, T3)
189
+
190
+ assert text_input_tokens_mid1.attention_mask.all() and text_input_tokens_mid2.attention_mask.all(), \
191
+ "prompt_middle should be the same and thus no padding"
192
+
193
+ # remove bos, make "tokenize subseq and concat" equivalent to "tokenize the whole seq"
194
+ text_input_tokens_mid1.input_ids = text_input_tokens_mid1.input_ids[:, 1:]
195
+ text_input_tokens_mid1.attention_mask = text_input_tokens_mid1.attention_mask[:, 1:]
196
+ text_input_tokens_mid2.input_ids = text_input_tokens_mid2.input_ids[:, 1:]
197
+ text_input_tokens_mid2.attention_mask = text_input_tokens_mid2.attention_mask[:, 1:]
198
+ text_input_tokens_post.input_ids = text_input_tokens_post.input_ids[:, 1:]
199
+ text_input_tokens_post.attention_mask = text_input_tokens_post.attention_mask[:, 1:]
200
+ for i in range(bs):
201
+ if not img_masks[i].any():
202
+ # no image input, also mask the text prompt for image tokens
203
+ text_input_tokens_mid1.attention_mask[i].fill_(0)
204
+
205
+ inputs_embeds_pre = self.llm_model.get_input_embeddings()(text_input_tokens_pre.input_ids)
206
+ inputs_embeds_mid1 = self.llm_model.get_input_embeddings()(text_input_tokens_mid1.input_ids)
207
+ inputs_embeds_mid2 = self.llm_model.get_input_embeddings()(text_input_tokens_mid2.input_ids)
208
+ inputs_embeds_post = self.llm_model.get_input_embeddings()(text_input_tokens_post.input_ids)
209
+
210
+ # since img_tokens, prompt_mid, obj_tokens are fixed length without padding, we concat them first
211
+ inputs_embeds_mid = torch.cat([inputs_embeds_mid1, img_tokens, inputs_embeds_mid2, obj_tokens], dim=1)
212
+ attn_mask_mid = torch.cat(
213
+ [text_input_tokens_mid1.attention_mask, img_masks, text_input_tokens_mid2.attention_mask, obj_masks],
214
+ dim=1,
215
+ )
216
+
217
+ post_pad_length = torch.logical_not(text_input_tokens_post.attention_mask).sum(-1)
218
+
219
+ bs, l1, hidden_dim = inputs_embeds_pre.shape
220
+ _, l2, _ = inputs_embeds_mid.shape
221
+ _, l3, _ = inputs_embeds_post.shape
222
+
223
+ inputs_embeds = torch.zeros(bs, l1+l2+l3, hidden_dim).type(inputs_embeds_pre.dtype).to(device)
224
+ attention_mask = torch.zeros(bs, l1+l2+l3).type(obj_masks.dtype).to(device)
225
+
226
+ # assign by chunks
227
+ for i in range(bs):
228
+ post_pad_len = post_pad_length[i]
229
+
230
+ if post_pad_len > 0:
231
+ inputs_embeds[i, :post_pad_len] = inputs_embeds_post[i, -post_pad_len:]
232
+ attention_mask[i, :post_pad_len] = 0
233
+ inputs_embeds[i, post_pad_len+l1+l2:] = inputs_embeds_post[i, :-post_pad_len]
234
+ attention_mask[i, post_pad_len+l1+l2:] = 1
235
+ else:
236
+ # no padding
237
+ inputs_embeds[i, -l3:] = inputs_embeds_post[i]
238
+ attention_mask[i, -l3:] = 1
239
+
240
+ inputs_embeds[i, post_pad_len: post_pad_len+l1] = inputs_embeds_pre[i]
241
+ attention_mask[i, post_pad_len: post_pad_len+l1] = text_input_tokens_pre.attention_mask[i]
242
+
243
+ inputs_embeds[i, post_pad_len+l1: post_pad_len+l1+l2] = inputs_embeds_mid[i]
244
+ attention_mask[i, post_pad_len+l1: post_pad_len+l1+l2] = attn_mask_mid[i]
245
+
246
+ return inputs_embeds, attention_mask, (l1, l2, l3)
247
+
248
+ def forward(self, data_dict):
249
+ if self.predict_mode:
250
+ return self.generate(data_dict=data_dict)
251
+ """
252
+ data_dict requires keys:
253
+ # input
254
+ prompt_before_obj: list of str, (B,)
255
+ prompt_middle_1: list of str, (B,)
256
+ prompt_middle_2: list of str, (B,)
257
+ prompt_after_obj: list of str, (B,)
258
+ obj_fts: (B, N, P, 6), xyz + rgb
259
+ obj_masks: (B, N), 1 valid and 0 masked
260
+ obj_locs: (B, N, 6), xyz + whd
261
+ anchor_locs: (B, 3)
262
+ anchor_orientation: (B, C)
263
+ img_fts: (B, 3, H, W), rgb
264
+ img_masks: (B, 1), 1 valid and 0 masked
265
+ # output
266
+ output_gt: list of str, (B,)
267
+ """
268
+ device = self.device
269
+ bs = len(data_dict['prompt_after_obj'])
270
+ data_dict['bs'] = bs
271
+ if 'obj_tokens' not in data_dict:
272
+ # obtain obj tokens
273
+ data_dict = self.pcd_encoder(data_dict)
274
+ # TO CHANGE FOR DEBUG
275
+ #self.llm_model.float()
276
+ #data_dict['obj_tokens'] = torch.zeros((data_dict['obj_locs'].shape[0], data_dict['obj_locs'].shape[1], 256)).to(device=device)
277
+
278
+ data_dict['obj_tokens'] = self.pcd_proj(data_dict['obj_tokens'].to(device))
279
+ # data_dict['obj_tokens'] = data_dict['obj_tokens'] + self.pcd_type_embed
280
+
281
+ data_dict['img_tokens'] = self.img_proj(self.img_encoder(data_dict['img_fts']))
282
+ # data_dict['img_tokens'] = data_dict['img_tokens'] + self.img_type_embed
283
+
284
+ # build input embdes and record prompt position
285
+ inputs_embeds, attention_mask, input_length = self.build_right_justified_sequence(data_dict=data_dict)
286
+ obj_token_length = data_dict['obj_masks'].shape[1]
287
+ # (B, T1+O+T2, D), (B, T1+O+T2)
288
+
289
+ self.llm_tokenizer.padding_side = 'right'
290
+ self.llm_tokenizer.truncation_side = 'right'
291
+ text_output_tokens = self.llm_tokenizer(
292
+ [t + self.llm_tokenizer.eos_token for t in data_dict['output_gt']],
293
+ return_tensors='pt',
294
+ padding='longest',
295
+ truncation=True,
296
+ max_length=self.max_out_len,
297
+ ).to(device)
298
+ # record position for special token [SOS]
299
+ grd_token_id = self.llm_tokenizer.convert_tokens_to_ids(['<s>'])[0]
300
+ out_input_ids_remove_first_sos = text_output_tokens.input_ids.clone()
301
+ out_input_ids_remove_first_sos[:, 0] = -100
302
+ grd_ind_0, grd_ind_1 = (out_input_ids_remove_first_sos == grd_token_id).nonzero(as_tuple=True)
303
+
304
+
305
+ text_output_embeds = self.llm_model.get_input_embeddings()(text_output_tokens.input_ids) # (B, T3, D)
306
+ inputs_embeds = torch.cat([inputs_embeds, text_output_embeds], dim=1) # (B, T1+O+T2+T3, D)
307
+ attention_mask = torch.cat([attention_mask, text_output_tokens.attention_mask], dim=1) # (B, T1+O+T2+T3)
308
+
309
+ # construct targets
310
+ targets = torch.zeros_like(attention_mask).long().fill_(-100) # (B, T1+O+T2+T3)
311
+
312
+ # only apply loss to answer tokens
313
+ targets_idx = text_output_tokens.attention_mask.bool()
314
+ targets[:, -targets_idx.shape[1]:][targets_idx] = text_output_tokens.input_ids[targets_idx]
315
+
316
+ # do not predict bos token, regard it as condition instead
317
+ targets[:, -targets_idx.shape[1]] = -100
318
+
319
+ with maybe_autocast(self):
320
+ outputs = self.llm_model(
321
+ inputs_embeds=inputs_embeds.float(), # not-half mode
322
+ attention_mask=attention_mask,
323
+ return_dict=True,
324
+ output_hidden_states=True,
325
+ )
326
+
327
+ logits = outputs.logits.float()
328
+ last_hidden_state = outputs.hidden_states[-1]
329
+
330
+ # different from the loss inside `llm_model.forward`, here we take mean of each sequence instead of sum
331
+ shift_logits = logits[..., :-1, :].contiguous()
332
+ shift_labels = targets[..., 1:].contiguous()
333
+ num_tokens_for_loss = (shift_labels >= 0).int().sum(1) # (B,)
334
+
335
+ shift_logits = rearrange(shift_logits, 'b t v -> (b t) v')
336
+ shift_labels = rearrange(shift_labels, 'b t -> (b t)')
337
+
338
+ shift_labels = shift_labels.to(shift_logits.device)
339
+
340
+ # record for llm loss
341
+ data_dict['llm_logits'] = shift_logits
342
+ data_dict['llm_labels'] = shift_labels
343
+ data_dict['num_tokens_for_loss'] = num_tokens_for_loss
344
+
345
+ # record for grounding loss
346
+ grd_list = []
347
+ obj_list = []
348
+ mask_list = []
349
+ for step in range(len(grd_ind_0)):
350
+ batch_ind = grd_ind_0[step]
351
+ grd_token_ind = grd_ind_1[step]
352
+ if self.pre_grounding:
353
+ output_obj_tokens = data_dict['obj_tokens'][batch_ind]
354
+ else:
355
+ output_obj_tokens = last_hidden_state[batch_ind, input_length[0] + input_length[1] - obj_token_length : input_length[0] + input_length[1], :]
356
+ output_grd_tokens = last_hidden_state[batch_ind, sum(input_length) + grd_token_ind:sum(input_length) + grd_token_ind + 1, :]
357
+ grd_list.append(output_grd_tokens)
358
+ obj_list.append(output_obj_tokens)
359
+ mask_list.append(data_dict['obj_masks'][batch_ind])
360
+ output_obj = torch.stack(obj_list).float()
361
+ output_grd = torch.stack(grd_list).float()
362
+ data_dict['ground_logits'] = self.ground_head(output_obj, output_grd, torch.stack(mask_list))
363
+ # data_dict['ground_label'] = torch.concat(data_dict['tgt_object_id'], dim=0)
364
+
365
+ # record for cls loss
366
+ #obj_cls_post_embeds = last_hidden_state[:, input_length[0] + input_length[1] - obj_token_length : input_length[0] + input_length[1], :].float()
367
+ obj_cls_post_embeds = data_dict['obj_tokens'].float()
368
+ data_dict['obj_cls_post_logits'] = self.obj_cls_head(obj_cls_post_embeds)
369
+ return data_dict
370
+
371
+ @torch.no_grad()
372
+ def generate(
373
+ self,
374
+ data_dict,
375
+ use_nucleus_sampling=False,
376
+ num_beams=5,
377
+ max_length=256,
378
+ min_length=1,
379
+ top_p=0.9,
380
+ repetition_penalty=6.0,
381
+ length_penalty=1,
382
+ num_captions=1,
383
+ temperature=1,
384
+ ):
385
+ """
386
+ data_dict requires the same keys as forward() except output_gt
387
+ """
388
+ device = self.device
389
+ bs = len(data_dict['prompt_after_obj'])
390
+ data_dict['bs'] = bs
391
+ if 'obj_tokens' not in data_dict:
392
+ # obtain obj tokens
393
+ data_dict = self.pcd_encoder(data_dict)
394
+ # TO CHANGE FOR DEBUG
395
+ #self.llm_model.float()
396
+ #data_dict['obj_tokens'] = torch.zeros((data_dict['obj_locs'].shape[0], data_dict['obj_locs'].shape[1], 256)).to(device=device)
397
+
398
+ data_dict['obj_tokens'] = self.pcd_proj(data_dict['obj_tokens'].to(device))
399
+ # data_dict['obj_tokens'] = data_dict['obj_tokens'] + self.pcd_type_embed
400
+
401
+ data_dict['img_tokens'] = self.img_proj(self.img_encoder(data_dict['img_fts']))
402
+ # data_dict['img_tokens'] = data_dict['img_tokens'] + self.img_type_embed
403
+
404
+ inputs_embeds, attention_mask, input_length = self.build_right_justified_sequence(data_dict=data_dict)
405
+ obj_token_length = data_dict['obj_masks'].shape[1]
406
+
407
+ # give bos token as condition
408
+ bos_tokens = self.llm_tokenizer(
409
+ [self.llm_tokenizer.bos_token] * bs,
410
+ return_tensors='pt',
411
+ ).to(device)
412
+ bos_tokens_ids = bos_tokens.input_ids[:, 0:1] # (B, 1)
413
+ bos_tokens_attn = bos_tokens.attention_mask[:, 0:1] # (B, 1)
414
+
415
+ # prepare a `bos_token`
416
+ bos_embeds = self.llm_model.get_input_embeddings()(bos_tokens_ids) # (B, 1, D)
417
+ inputs_embeds = torch.cat([inputs_embeds, bos_embeds], dim=1) # (B, T1+O+T2+1, D)
418
+ attention_mask = torch.cat([attention_mask, bos_tokens_attn], dim=1) # (B, T1+O+T2+1)
419
+
420
+ with maybe_autocast(self):
421
+ outputs = self.llm_model.generate(
422
+ inputs_embeds=inputs_embeds,
423
+ attention_mask=attention_mask,
424
+ do_sample=use_nucleus_sampling,
425
+ top_p=top_p,
426
+ temperature=temperature,
427
+ num_beams=num_beams,
428
+ max_length=max_length,
429
+ min_length=min_length,
430
+ repetition_penalty=repetition_penalty,
431
+ length_penalty=length_penalty,
432
+ num_return_sequences=num_captions,
433
+ return_dict_in_generate=True,
434
+ output_hidden_states=True,
435
+ output_scores=True
436
+ )
437
+ # note output_ids_idx - 1 = step idx, because we do not preduct [BOS]
438
+ beam_indices = outputs.beam_indices # bs x step, beam indices range (bsxbeam)
439
+ scores = outputs.scores # step x (bs x beam) x vocab
440
+ hidden_states = outputs.hidden_states # step x layer x (bs x beam) x token_num x hidden_dim
441
+ outputs = outputs.sequences # bs x output_ids
442
+ outputs[outputs == self.llm_tokenizer.unk_token_id] = self.llm_tokenizer.eos_token_id
443
+ # data_dict['output_tokens'] = outputs # unable to gather variable-length tensors
444
+
445
+ # record for grounding
446
+ grd_token_id = self.llm_tokenizer.convert_tokens_to_ids(['<s>'])[0]
447
+ out_input_ids_remove_first_sos = outputs.clone()
448
+ out_input_ids_remove_first_sos[:, 0] = -100
449
+ grd_ind_0, grd_ind_1 = (out_input_ids_remove_first_sos == grd_token_id).nonzero(as_tuple=True)
450
+
451
+ grd_list = []
452
+ grd_batch_ind_list = []
453
+ obj_list = []
454
+ mask_list = []
455
+ if len(grd_ind_0) > 0:
456
+ for step in range(len(grd_ind_0)):
457
+ batch_ind = grd_ind_0[step]
458
+ grd_token_ind = grd_ind_1[step]
459
+ #output_obj_tokens = last_hidden_state[batch_ind, input_length[0] + input_length[1] - obj_token_length : input_length[0] + input_length[1], :]
460
+ output_obj_tokens = data_dict['obj_tokens'][batch_ind]
461
+ output_grd_tokens = hidden_states[grd_token_ind-1][-1][beam_indices[batch_ind, grd_token_ind-1]][-1].unsqueeze(0) # grd_token_ind - 1 because first token is sos
462
+ grd_list.append(output_grd_tokens)
463
+ grd_batch_ind_list.append(batch_ind)
464
+ obj_list.append(output_obj_tokens)
465
+ mask_list.append(data_dict['obj_masks'][batch_ind])
466
+ output_obj = torch.stack(obj_list).float()
467
+ output_grd = torch.stack(grd_list).float()
468
+ data_dict['ground_logits'] = self.ground_head(output_obj, output_grd, torch.stack(mask_list))
469
+ else:
470
+ data_dict['ground_logits'] = None
471
+ # data_dict['ground_label'] = torch.concat(data_dict['tgt_object_id'], dim=0)
472
+ data_dict['grd_batch_ind_list'] = grd_batch_ind_list
473
+
474
+ output_txt = self.llm_tokenizer.batch_decode(outputs, skip_special_tokens=True)
475
+ output_txt = [txt.strip() for txt in output_txt]
476
+ data_dict['output_txt'] = output_txt
477
+ return data_dict
leo/pcd_encoder.py ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import einops
3
+ import numpy as np
4
+ import torch.nn.functional as F
5
+ from torch import Tensor, nn
6
+ from typing import Optional
7
+ from leo.utils import get_activation_fn, layer_repeat, calc_pairwise_locs
8
+
9
+
10
+ def disabled_train(self, mode=True):
11
+ """
12
+ Overwrite model.train with this function to make sure train/eval mode does not change anymore
13
+ """
14
+ return self
15
+
16
+
17
+ class TransformerEncoderLayer(nn.Module):
18
+ def __init__(self, d_model, nhead, dim_feedforward=2048, batch_first=True, dropout=0.1, activation="relu", prenorm=False):
19
+ super().__init__()
20
+ self.self_attn = nn.MultiheadAttention(
21
+ d_model, nhead, dropout=dropout, batch_first=batch_first
22
+ )
23
+ # Implementation of Feedforward modules
24
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
25
+ self.dropout = nn.Dropout(dropout)
26
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
27
+
28
+ self.norm1 = nn.LayerNorm(d_model)
29
+ self.norm2 = nn.LayerNorm(d_model)
30
+ self.dropout1 = nn.Dropout(dropout)
31
+ self.dropout2 = nn.Dropout(dropout)
32
+
33
+ self.activation = get_activation_fn(activation)
34
+ self.prenorm = prenorm
35
+
36
+ def forward(
37
+ self, tgt, tgt_mask: Optional[Tensor] = None,
38
+ tgt_key_padding_mask: Optional[Tensor] = None,
39
+ ):
40
+ tgt2 = tgt
41
+ if self.prenorm:
42
+ tgt2 = self.norm1(tgt2)
43
+ tgt2, self_attn_matrices = self.self_attn(
44
+ query=tgt2, key=tgt2, value=tgt2, attn_mask=tgt_mask,
45
+ key_padding_mask=tgt_key_padding_mask
46
+ )
47
+ tgt = tgt + self.dropout1(tgt2)
48
+ if not self.prenorm:
49
+ tgt = self.norm1(tgt)
50
+ if self.prenorm:
51
+ tgt = self.norm2(tgt)
52
+ tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
53
+ tgt = tgt + self.dropout2(tgt2)
54
+ if not self.prenorm:
55
+ tgt = self.norm2(tgt)
56
+ return tgt, self_attn_matrices
57
+
58
+
59
+ class MultiHeadAttentionSpatial(nn.Module):
60
+ def __init__(
61
+ self, d_model, n_head, dropout=0.1, spatial_multihead=True, spatial_dim=5,
62
+ spatial_attn_fusion='mul',
63
+ ):
64
+ super().__init__()
65
+ assert d_model % n_head == 0, 'd_model: %d, n_head: %d' % (d_model, n_head)
66
+
67
+ self.n_head = n_head
68
+ self.d_model = d_model
69
+ self.d_per_head = d_model // n_head
70
+ self.spatial_multihead = spatial_multihead
71
+ self.spatial_dim = spatial_dim
72
+ self.spatial_attn_fusion = spatial_attn_fusion
73
+
74
+ self.w_qs = nn.Linear(d_model, d_model)
75
+ self.w_ks = nn.Linear(d_model, d_model)
76
+ self.w_vs = nn.Linear(d_model, d_model)
77
+
78
+ self.fc = nn.Linear(d_model, d_model)
79
+ self.dropout = nn.Dropout(p=dropout)
80
+ self.layer_norm = nn.LayerNorm(d_model)
81
+
82
+ self.spatial_n_head = n_head if spatial_multihead else 1
83
+ if self.spatial_attn_fusion in ['mul', 'bias', 'add']:
84
+ self.pairwise_loc_fc = nn.Linear(spatial_dim, self.spatial_n_head)
85
+ elif self.spatial_attn_fusion == 'ctx':
86
+ self.pairwise_loc_fc = nn.Linear(spatial_dim, d_model)
87
+ elif self.spatial_attn_fusion == 'cond':
88
+ self.lang_cond_fc = nn.Linear(d_model, self.spatial_n_head * (spatial_dim + 1))
89
+ else:
90
+ raise NotImplementedError('unsupported spatial_attn_fusion %s' % (self.spatial_attn_fusion))
91
+
92
+ def forward(self, q, k, v, pairwise_locs, key_padding_mask=None, txt_embeds=None):
93
+ residual = q
94
+ q = einops.rearrange(self.w_qs(q), 'b l (head k) -> head b l k', head=self.n_head)
95
+ k = einops.rearrange(self.w_ks(k), 'b t (head k) -> head b t k', head=self.n_head)
96
+ v = einops.rearrange(self.w_vs(v), 'b t (head v) -> head b t v', head=self.n_head)
97
+ attn = torch.einsum('hblk,hbtk->hblt', q, k) / np.sqrt(q.shape[-1])
98
+
99
+ if self.spatial_attn_fusion in ['mul', 'bias', 'add']:
100
+ loc_attn = self.pairwise_loc_fc(pairwise_locs)
101
+ loc_attn = einops.rearrange(loc_attn, 'b l t h -> h b l t')
102
+ if self.spatial_attn_fusion == 'mul':
103
+ loc_attn = F.relu(loc_attn)
104
+ if not self.spatial_multihead:
105
+ loc_attn = einops.repeat(loc_attn, 'h b l t -> (h nh) b l t', nh=self.n_head)
106
+ elif self.spatial_attn_fusion == 'ctx':
107
+ loc_attn = self.pairwise_loc_fc(pairwise_locs)
108
+ loc_attn = einops.rearrange(loc_attn, 'b l t (h k) -> h b l t k', h=self.n_head)
109
+ loc_attn = torch.einsum('hblk,hbltk->hblt', q, loc_attn) / np.sqrt(q.shape[-1])
110
+ elif self.spatial_attn_fusion == 'cond':
111
+ spatial_weights = self.lang_cond_fc(residual)
112
+ spatial_weights = einops.rearrange(spatial_weights, 'b l (h d) -> h b l d', h=self.spatial_n_head,
113
+ d=self.spatial_dim + 1)
114
+ if self.spatial_n_head == 1:
115
+ spatial_weights = einops.repeat(spatial_weights, '1 b l d -> h b l d', h=self.n_head)
116
+ spatial_bias = spatial_weights[..., :1]
117
+ spatial_weights = spatial_weights[..., 1:]
118
+ loc_attn = torch.einsum('hbld,bltd->hblt', spatial_weights, pairwise_locs) + spatial_bias
119
+ loc_attn = torch.sigmoid(loc_attn)
120
+
121
+ if key_padding_mask is not None:
122
+ mask = einops.repeat(key_padding_mask, 'b t -> h b l t', h=self.n_head, l=q.size(2))
123
+ attn = attn.masked_fill(mask, -np.inf)
124
+ if self.spatial_attn_fusion in ['mul', 'cond']:
125
+ loc_attn = loc_attn.masked_fill(mask, 0)
126
+ else:
127
+ loc_attn = loc_attn.masked_fill(mask, -np.inf)
128
+
129
+ if self.spatial_attn_fusion == 'add':
130
+ fused_attn = (torch.softmax(attn, 3) + torch.softmax(loc_attn, 3)) / 2
131
+ else:
132
+ if self.spatial_attn_fusion in ['mul', 'cond']:
133
+ fused_attn = torch.log(torch.clamp(loc_attn, min=1e-6)) + attn
134
+ else:
135
+ fused_attn = loc_attn + attn
136
+ fused_attn = torch.softmax(fused_attn, 3)
137
+
138
+ assert torch.sum(torch.isnan(fused_attn) == 0), print(fused_attn)
139
+
140
+ output = torch.einsum('hblt,hbtv->hblv', fused_attn, v)
141
+ output = einops.rearrange(output, 'head b l v -> b l (head v)')
142
+ output = self.dropout(self.fc(output))
143
+ output = self.layer_norm(output + residual)
144
+ return output, fused_attn
145
+
146
+
147
+ class TransformerSpatialEncoderLayer(TransformerEncoderLayer):
148
+ def __init__(
149
+ self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu",
150
+ spatial_multihead=True, spatial_dim=5, spatial_attn_fusion='mul'
151
+ ):
152
+ super().__init__(
153
+ d_model, nhead, dim_feedforward=dim_feedforward, dropout=dropout, activation=activation
154
+ )
155
+ del self.self_attn
156
+ self.self_attn = MultiHeadAttentionSpatial(
157
+ d_model, nhead, dropout=dropout,
158
+ spatial_multihead=spatial_multihead,
159
+ spatial_dim=spatial_dim,
160
+ spatial_attn_fusion=spatial_attn_fusion,
161
+ )
162
+
163
+ def forward(
164
+ self, tgt, tgt_pairwise_locs,
165
+ tgt_mask: Optional[Tensor] = None,
166
+ tgt_key_padding_mask: Optional[Tensor] = None,
167
+ ):
168
+ tgt2 = tgt
169
+ tgt2, self_attn_matrices = self.self_attn(
170
+ tgt2, tgt2, tgt2, tgt_pairwise_locs,
171
+ key_padding_mask=tgt_key_padding_mask
172
+ )
173
+ tgt = tgt + self.dropout1(tgt2)
174
+ tgt = self.norm1(tgt)
175
+ tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
176
+ tgt = tgt + self.dropout2(tgt2)
177
+ tgt = self.norm2(tgt)
178
+ return tgt, self_attn_matrices
179
+
180
+
181
+ def _init_weights_bert(module, std=0.02):
182
+ """
183
+ Huggingface transformer weight initialization,
184
+ most commonly for bert initialization
185
+ """
186
+ if isinstance(module, nn.Linear):
187
+ # Slightly different from the TF version which uses truncated_normal for initialization
188
+ # cf https://github.com/pytorch/pytorch/pull/5617
189
+ module.weight.data.normal_(mean=0.0, std=std)
190
+ if module.bias is not None:
191
+ module.bias.data.zero_()
192
+ elif isinstance(module, nn.Embedding):
193
+ module.weight.data.normal_(mean=0.0, std=std)
194
+ if module.padding_idx is not None:
195
+ module.weight.data[module.padding_idx].zero_()
196
+ elif isinstance(module, nn.LayerNorm):
197
+ module.bias.data.zero_()
198
+ module.weight.data.fill_(1.0)
199
+
200
+
201
+ def generate_fourier_features(pos, num_bands=10, max_freq=15, concat_pos=True, sine_only=False):
202
+ # Input: B, N, C
203
+ # Output: B, N, C'
204
+ batch_size = pos.shape[0]
205
+ device = pos.device
206
+
207
+ min_freq = 1.0
208
+ # Nyquist frequency at the target resolution:
209
+ freq_bands = torch.linspace(start=min_freq, end=max_freq, steps=num_bands, device=device)
210
+
211
+ # Get frequency bands for each spatial dimension.
212
+ # Output is size [n, d * num_bands]
213
+ per_pos_features = pos.unsqueeze(-1).repeat(1, 1, 1, num_bands) * freq_bands
214
+ per_pos_features = torch.reshape(
215
+ per_pos_features, [batch_size, -1, np.prod(per_pos_features.shape[2:])])
216
+ if sine_only:
217
+ # Output is size [n, d * num_bands]
218
+ per_pos_features = torch.sin(np.pi * (per_pos_features))
219
+ else:
220
+ # Output is size [n, 2 * d * num_bands]
221
+ per_pos_features = torch.cat(
222
+ [torch.sin(np.pi * per_pos_features), torch.cos(np.pi * per_pos_features)], dim=-1
223
+ )
224
+ # Concatenate the raw input positions.
225
+ if concat_pos:
226
+ # Adds d bands to the encoding.
227
+ per_pos_features = torch.cat(
228
+ [pos, per_pos_features.expand(batch_size, -1, -1)], dim=-1)
229
+ return per_pos_features
230
+
231
+
232
+ class OSE3D(nn.Module):
233
+ # Open-vocabulary, Spatial-attention, Embodied-token, 3D-agent
234
+ def __init__(self, use_spatial_attn=True, use_embodied_token=False, hidden_dim=256, fourier_size=84, spatial_encoder={
235
+ "num_attention_heads": 8,
236
+ "dim_feedforward": 2048,
237
+ "dropout": 0.1,
238
+ "activation": "gelu",
239
+ "spatial_dim": 5,
240
+ "spatial_multihead": True,
241
+ "spatial_attn_fusion": "cond",
242
+ "num_layers": 3,
243
+ "pairwise_rel_type": "center",
244
+ "spatial_dist_norm": True,
245
+ "obj_loc_encoding": "same_all",
246
+ "dim_loc": 6,
247
+ }):
248
+ super().__init__()
249
+ self.use_spatial_attn = use_spatial_attn # spatial attention
250
+ self.use_embodied_token = use_embodied_token # embodied token
251
+
252
+ # pcd backbone
253
+ # self.obj_encoder = PointcloudBackbone(backbone)
254
+ self.obj_proj = nn.Linear(768, hidden_dim)
255
+
256
+ # embodied token
257
+ if self.use_embodied_token:
258
+ self.anchor_feat = nn.Parameter(torch.zeros(1, 1, hidden_dim))
259
+ self.anchor_size = nn.Parameter(torch.ones(1, 1, 3))
260
+ self.orient_encoder = nn.Linear(fourier_size, hidden_dim)
261
+ self.obj_type_embed = nn.Embedding(2, hidden_dim)
262
+
263
+ # spatial encoder
264
+ if self.use_spatial_attn:
265
+ spatial_encoder_layer = TransformerSpatialEncoderLayer(
266
+ d_model=hidden_dim,
267
+ nhead=spatial_encoder['num_attention_heads'],
268
+ dim_feedforward=spatial_encoder['dim_feedforward'],
269
+ dropout=spatial_encoder['dropout'],
270
+ activation=spatial_encoder['activation'],
271
+ spatial_dim=spatial_encoder['spatial_dim'],
272
+ spatial_multihead=spatial_encoder['spatial_multihead'],
273
+ spatial_attn_fusion=spatial_encoder['spatial_attn_fusion'],
274
+ )
275
+ else:
276
+ spatial_encoder_layer = TransformerEncoderLayer(
277
+ d_model=hidden_dim,
278
+ nhead=spatial_encoder['num_attention_heads'],
279
+ dim_feedforward=spatial_encoder['dim_feedforward'],
280
+ dropout=spatial_encoder['dropout'],
281
+ activation=spatial_encoder['activation'],
282
+ )
283
+
284
+ self.spatial_encoder = layer_repeat(
285
+ spatial_encoder_layer,
286
+ spatial_encoder['num_layers'],
287
+ )
288
+ self.pairwise_rel_type = spatial_encoder['pairwise_rel_type']
289
+ self.spatial_dist_norm = spatial_encoder['spatial_dist_norm']
290
+ self.spatial_dim = spatial_encoder['spatial_dim']
291
+ self.obj_loc_encoding = spatial_encoder['obj_loc_encoding']
292
+
293
+ # location encoding
294
+ if self.obj_loc_encoding in ['same_0', 'same_all']:
295
+ num_loc_layers = 1
296
+ elif self.obj_loc_encoding == 'diff_all':
297
+ num_loc_layers = spatial_encoder['num_layers']
298
+
299
+ loc_layer = nn.Sequential(
300
+ nn.Linear(spatial_encoder['dim_loc'], hidden_dim),
301
+ nn.LayerNorm(hidden_dim),
302
+ )
303
+ self.loc_layers = layer_repeat(loc_layer, num_loc_layers)
304
+
305
+
306
+ # only initialize spatial encoder and loc layers
307
+ self.spatial_encoder.apply(_init_weights_bert)
308
+ self.loc_layers.apply(_init_weights_bert)
309
+
310
+ if self.use_embodied_token:
311
+ nn.init.normal_(self.anchor_feat, std=0.02)
312
+
313
+ @property
314
+ def device(self):
315
+ return list(self.parameters())[0].device
316
+
317
+ def forward(self, data_dict):
318
+ """
319
+ data_dict requires keys:
320
+ obj_fts: (B, N, P, 6), xyz + rgb
321
+ obj_masks: (B, N), 1 valid and 0 masked
322
+ obj_locs: (B, N, 6), xyz + whd
323
+ anchor_locs: (B, 3)
324
+ anchor_orientation: (B, C)
325
+ """
326
+
327
+ # obj_feats = self.obj_encoder(data_dict['obj_fts'])
328
+ obj_feats = data_dict['obj_feats']
329
+ obj_feats = self.obj_proj(obj_feats)
330
+ obj_masks = ~data_dict['obj_masks'] # flipped due to different convention of TransformerEncoder
331
+
332
+ B, N = obj_feats.shape[:2]
333
+ device = obj_feats.device
334
+
335
+ obj_type_ids = torch.zeros((B, N), dtype=torch.long, device=device)
336
+ obj_type_embeds = self.obj_type_embed(obj_type_ids)
337
+
338
+ if self.use_embodied_token:
339
+ # anchor feature
340
+ anchor_orient = data_dict['anchor_orientation'].unsqueeze(1)
341
+ anchor_orient_feat = self.orient_encoder(generate_fourier_features(anchor_orient))
342
+ anchor_feat = self.anchor_feat + anchor_orient_feat
343
+ anchor_mask = torch.zeros((B, 1), dtype=bool, device=device)
344
+
345
+ # anchor loc (3) + size (3)
346
+ anchor_loc = torch.cat(
347
+ [data_dict['anchor_locs'].unsqueeze(1), self.anchor_size.expand(B, -1, -1).to(device)], dim=-1
348
+ )
349
+
350
+ # anchor type
351
+ anchor_type_id = torch.ones((B, 1), dtype=torch.long, device=device)
352
+ anchor_type_embed = self.obj_type_embed(anchor_type_id)
353
+
354
+ # fuse anchor and objs
355
+ all_obj_feats = torch.cat([anchor_feat, obj_feats], dim=1)
356
+ all_obj_masks = torch.cat((anchor_mask, obj_masks), dim=1)
357
+
358
+ all_obj_locs = torch.cat([anchor_loc, data_dict['obj_locs']], dim=1)
359
+ all_obj_type_embeds = torch.cat((anchor_type_embed, obj_type_embeds), dim=1)
360
+
361
+ else:
362
+ all_obj_feats = obj_feats
363
+ all_obj_masks = obj_masks
364
+
365
+ all_obj_locs = data_dict['obj_locs']
366
+ all_obj_type_embeds = obj_type_embeds
367
+
368
+ all_obj_feats = all_obj_feats + all_obj_type_embeds
369
+
370
+ # call spatial encoder
371
+ if self.use_spatial_attn:
372
+ pairwise_locs = calc_pairwise_locs(
373
+ all_obj_locs[:, :, :3],
374
+ all_obj_locs[:, :, 3:],
375
+ pairwise_rel_type=self.pairwise_rel_type,
376
+ spatial_dist_norm=self.spatial_dist_norm,
377
+ spatial_dim=self.spatial_dim,
378
+ )
379
+
380
+ for i, pc_layer in enumerate(self.spatial_encoder):
381
+ if self.obj_loc_encoding == 'diff_all':
382
+ query_pos = self.loc_layers[i](all_obj_locs)
383
+ else:
384
+ query_pos = self.loc_layers[0](all_obj_locs)
385
+ if not (self.obj_loc_encoding == 'same_0' and i > 0):
386
+ all_obj_feats = all_obj_feats + query_pos
387
+
388
+ if self.use_spatial_attn:
389
+ all_obj_feats, _ = pc_layer(
390
+ all_obj_feats, pairwise_locs,
391
+ tgt_key_padding_mask=all_obj_masks
392
+ )
393
+ else:
394
+ all_obj_feats, _ = pc_layer(
395
+ all_obj_feats,
396
+ tgt_key_padding_mask=all_obj_masks
397
+ )
398
+
399
+ data_dict['obj_tokens'] = all_obj_feats
400
+ data_dict['obj_masks'] = ~all_obj_masks
401
+
402
+ # ###feat_pth = os.path.join(ASSET_DIR, f'inputs/{scan_id}', f'{scan_id}_img_gt.pth')
403
+ # data_dict['obj_tokens'] = torch.load('assets/inputs/scene0350_00/obj_tokens.pth')
404
+ # data_dict['obj_masks'] = torch.load('assets/inputs/scene0350_00/obj_masks.pth')
405
+
406
+ return data_dict
leo/utils.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import copy
3
+ import torch
4
+ import einops
5
+ import numpy as np
6
+ from torch import nn
7
+ import torch.nn.functional as F
8
+
9
+
10
+
11
+ def get_activation_fn(activation_type):
12
+ if activation_type not in ["relu", "gelu", "glu"]:
13
+ raise RuntimeError(f"activation function currently support relu/gelu, not {activation_type}")
14
+ return getattr(F, activation_type)
15
+
16
+ def get_mlp_head(input_size, hidden_size, output_size, dropout=0):
17
+ return nn.Sequential(*[
18
+ nn.Linear(input_size, hidden_size),
19
+ nn.ReLU(),
20
+ nn.LayerNorm(hidden_size, eps=1e-12),
21
+ nn.Dropout(dropout),
22
+ nn.Linear(hidden_size, output_size)
23
+ ])
24
+
25
+ def layer_repeat(module, N, share_layer=False):
26
+ if share_layer:
27
+ return nn.ModuleList([module] * N)
28
+ else:
29
+ return nn.ModuleList([copy.deepcopy(module) for _ in range(N - 1)] + [module])
30
+
31
+
32
+ def calc_pairwise_locs(obj_centers, obj_whls, eps=1e-10, pairwise_rel_type='center', spatial_dist_norm=True,
33
+ spatial_dim=5):
34
+ if pairwise_rel_type == 'mlp':
35
+ obj_locs = torch.cat([obj_centers, obj_whls], 2)
36
+ pairwise_locs = torch.cat(
37
+ [einops.repeat(obj_locs, 'b l d -> b l x d', x=obj_locs.size(1)),
38
+ einops.repeat(obj_locs, 'b l d -> b x l d', x=obj_locs.size(1))],
39
+ dim=3
40
+ )
41
+ return pairwise_locs
42
+
43
+ pairwise_locs = einops.repeat(obj_centers, 'b l d -> b l 1 d') \
44
+ - einops.repeat(obj_centers, 'b l d -> b 1 l d')
45
+ pairwise_dists = torch.sqrt(torch.sum(pairwise_locs ** 2, 3) + eps) # (b, l, l)
46
+ if spatial_dist_norm:
47
+ max_dists = torch.max(pairwise_dists.view(pairwise_dists.size(0), -1), dim=1)[0]
48
+ norm_pairwise_dists = pairwise_dists / einops.repeat(max_dists, 'b -> b 1 1')
49
+ else:
50
+ norm_pairwise_dists = pairwise_dists
51
+
52
+ if spatial_dim == 1:
53
+ return norm_pairwise_dists.unsqueeze(3)
54
+
55
+ pairwise_dists_2d = torch.sqrt(torch.sum(pairwise_locs[..., :2] ** 2, 3) + eps)
56
+ if pairwise_rel_type == 'center':
57
+ pairwise_locs = torch.stack(
58
+ [norm_pairwise_dists, pairwise_locs[..., 2] / pairwise_dists,
59
+ pairwise_dists_2d / pairwise_dists, pairwise_locs[..., 1] / pairwise_dists_2d,
60
+ pairwise_locs[..., 0] / pairwise_dists_2d],
61
+ dim=3
62
+ )
63
+ elif pairwise_rel_type == 'vertical_bottom':
64
+ bottom_centers = torch.clone(obj_centers)
65
+ bottom_centers[:, :, 2] -= obj_whls[:, :, 2]
66
+ bottom_pairwise_locs = einops.repeat(bottom_centers, 'b l d -> b l 1 d') \
67
+ - einops.repeat(bottom_centers, 'b l d -> b 1 l d')
68
+ bottom_pairwise_dists = torch.sqrt(torch.sum(bottom_pairwise_locs ** 2, 3) + eps) # (b, l, l)
69
+ bottom_pairwise_dists_2d = torch.sqrt(torch.sum(bottom_pairwise_locs[..., :2] ** 2, 3) + eps)
70
+ pairwise_locs = torch.stack(
71
+ [norm_pairwise_dists,
72
+ bottom_pairwise_locs[..., 2] / bottom_pairwise_dists,
73
+ bottom_pairwise_dists_2d / bottom_pairwise_dists,
74
+ pairwise_locs[..., 1] / pairwise_dists_2d,
75
+ pairwise_locs[..., 0] / pairwise_dists_2d],
76
+ dim=3
77
+ )
78
+
79
+ if spatial_dim == 4:
80
+ pairwise_locs = pairwise_locs[..., 1:]
81
+ return pairwise_locs
82
+
83
+ def convert_pc_to_box(obj_pc):
84
+ xmin = np.min(obj_pc[:,0])
85
+ ymin = np.min(obj_pc[:,1])
86
+ zmin = np.min(obj_pc[:,2])
87
+ xmax = np.max(obj_pc[:,0])
88
+ ymax = np.max(obj_pc[:,1])
89
+ zmax = np.max(obj_pc[:,2])
90
+ center = [(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2]
91
+ box_size = [xmax-xmin, ymax-ymin, zmax-zmin]
92
+ return center, box_size
93
+
94
+ class LabelConverter(object):
95
+ def __init__(self, file_path):
96
+ self.raw_name_to_id = {}
97
+ self.nyu40id_to_id = {}
98
+ self.nyu40_name_to_id = {}
99
+ self.scannet_name_to_scannet_id = {'cabinet':0, 'bed':1, 'chair':2, 'sofa':3, 'table':4,
100
+ 'door':5, 'window':6,'bookshelf':7,'picture':8, 'counter':9, 'desk':10, 'curtain':11,
101
+ 'refrigerator':12, 'shower curtain':13, 'toilet':14, 'sink':15, 'bathtub':16, 'others':17}
102
+ self.id_to_scannetid = {}
103
+ self.scannet_raw_id_to_raw_name = {}
104
+ self.raw_name_to_scannet_raw_id = {}
105
+
106
+ with open(file_path, encoding='utf-8') as fd:
107
+ rd = list(csv.reader(fd, delimiter="\t", quotechar='"'))
108
+ for i in range(1, len(rd)):
109
+ raw_id = i - 1
110
+ scannet_raw_id = int(rd[i][0])
111
+ raw_name = rd[i][1]
112
+ nyu40_id = int(rd[i][4])
113
+ nyu40_name = rd[i][7]
114
+ self.raw_name_to_id[raw_name] = raw_id
115
+ self.scannet_raw_id_to_raw_name[scannet_raw_id] = raw_name
116
+ self.raw_name_to_scannet_raw_id[raw_name] = scannet_raw_id
117
+ self.nyu40id_to_id[nyu40_id] = raw_id
118
+ self.nyu40_name_to_id[nyu40_name] = raw_id
119
+ if nyu40_name not in self.scannet_name_to_scannet_id:
120
+ self.id_to_scannetid[raw_id] = self.scannet_name_to_scannet_id['others']
121
+ else:
122
+ self.id_to_scannetid[raw_id] = self.scannet_name_to_scannet_id[nyu40_name]
123
+
124
+ def build_rotate_mat(split, rot_aug=True, rand_angle='axis'):
125
+ if rand_angle == 'random':
126
+ theta = np.random.rand() * np.pi * 2
127
+ else:
128
+ ROTATE_ANGLES = [0, np.pi/2, np.pi, np.pi*3/2]
129
+ theta_idx = np.random.randint(len(ROTATE_ANGLES))
130
+ theta = ROTATE_ANGLES[theta_idx]
131
+ if (theta is not None) and (theta != 0) and (split == 'train') and rot_aug:
132
+ rot_matrix = np.array([
133
+ [np.cos(theta), -np.sin(theta), 0],
134
+ [np.sin(theta), np.cos(theta), 0],
135
+ [0, 0, 1]
136
+ ], dtype=np.float32)
137
+ else:
138
+ rot_matrix = None
139
+ return rot_matrix
140
+
141
+ def obj_processing_post(obj_pcds, rot_aug=True):
142
+ obj_pcds = torch.from_numpy(obj_pcds)
143
+ rot_matrix = build_rotate_mat('val', rot_aug)
144
+ if rot_matrix is not None:
145
+ rot_matrix = torch.from_numpy(rot_matrix.transpose())
146
+ obj_pcds[:, :, :3] @= rot_matrix
147
+
148
+ xyz = obj_pcds[:, :, :3]
149
+ center = xyz.mean(1)
150
+ xyz_min = xyz.min(1).values
151
+ xyz_max = xyz.max(1).values
152
+ box_center = (xyz_min + xyz_max) / 2
153
+ size = xyz_max - xyz_min
154
+ obj_locs = torch.cat([center, size], dim=1)
155
+ obj_boxes = torch.cat([box_center, size], dim=1)
156
+
157
+ # centering
158
+ obj_pcds[:, :, :3].sub_(obj_pcds[:, :, :3].mean(1, keepdim=True))
159
+
160
+ # normalization
161
+ max_dist = (obj_pcds[:, :, :3]**2).sum(2).sqrt().max(1).values
162
+ max_dist.clamp_(min=1e-6)
163
+ obj_pcds[:, :, :3].div_(max_dist[:, None, None])
164
+
165
+ return obj_pcds, obj_locs, obj_boxes, rot_matrix
166
+
167
+
168
+ def pad_sequence(sequence_list, max_len=None, pad=0, return_mask=False):
169
+ lens = [x.shape[0] for x in sequence_list]
170
+ if max_len is None:
171
+ max_len = max(lens)
172
+
173
+ shape = list(sequence_list[0].shape)
174
+ shape[0] = max_len
175
+ shape = [len(sequence_list)] + shape
176
+ dtype = sequence_list[0].dtype
177
+ device = sequence_list[0].device
178
+ padded_sequence = torch.ones(shape, dtype=dtype, device=device) * pad
179
+ for i, tensor in enumerate(sequence_list):
180
+ padded_sequence[i, :tensor.shape[0]] = tensor
181
+ padded_sequence = padded_sequence.to(dtype)
182
+
183
+ if return_mask:
184
+ mask = torch.arange(max_len).to(device)[None, :] >= torch.LongTensor(lens).to(device)[:, None] # True as masked.
185
+ return padded_sequence, mask
186
+ else:
187
+ return padded_sequence
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ clip==0.2.0
2
+ einops==0.8.0
3
+ gradio==4.39.0
4
+ numpy==1.24.3
5
+ peft==0.12.0
6
+ timm==1.0.8
7
+ torch==2.3.1
8
+ transformers==4.40.2