File size: 2,233 Bytes
aea26c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1162616
aea26c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
task_name: train
tags:
- dev
train: true
test: false
ckpt_path: null
seed: null
DATASETS:
  TRAIN:
    FREIHAND-TRAIN:
      WEIGHT: 0.2
    INTERHAND26M-TRAIN:
      WEIGHT: 0.1
    MTC-TRAIN:
      WEIGHT: 0.05
    RHD-TRAIN:
      WEIGHT: 0.05
    COCOW-TRAIN:
      WEIGHT: 0.05
    HALPE-TRAIN:
      WEIGHT: 0.05
    MPIINZSL-TRAIN:
      WEIGHT: 0.05
    HO3D-TRAIN:
      WEIGHT: 0.05
    H2O3D-TRAIN:
      WEIGHT: 0.05
    DEX-TRAIN:
      WEIGHT: 0.05
    BEDLAM-TRAIN:
      WEIGHT: 0.05
    REINTER-TRAIN:
      WEIGHT: 0.1
    HOT3D-TRAIN:
      WEIGHT: 0.05
    ARCTIC-TRAIN:
      WEIGHT: 0.1
  VAL:
    FREIHAND-TRAIN:
      WEIGHT: 1.0
  MOCAP: FREIHAND-MOCAP
  BETAS_REG: true
  CONFIG:
    SCALE_FACTOR: 0.3
    ROT_FACTOR: 30
    TRANS_FACTOR: 0.02
    COLOR_SCALE: 0.2
    ROT_AUG_RATE: 0.6
    TRANS_AUG_RATE: 0.5
    DO_FLIP: false
    FLIP_AUG_RATE: 0.0
    EXTREME_CROP_AUG_RATE: 0.0
    EXTREME_CROP_AUG_LEVEL: 1
extras:
  ignore_warnings: false
  enforce_tags: true
  print_config: true
exp_name: WiLoR
MANO:
  DATA_DIR: mano_data
  MODEL_PATH: ${MANO.DATA_DIR}/mano
  GENDER: neutral
  NUM_HAND_JOINTS: 15
  MEAN_PARAMS: ${MANO.DATA_DIR}/mano_mean_params.npz
  CREATE_BODY_POSE: false
EXTRA:
  FOCAL_LENGTH: 5000
  NUM_LOG_IMAGES: 4
  NUM_LOG_SAMPLES_PER_IMAGE: 8
  PELVIS_IND: 0
GENERAL:
  TOTAL_STEPS: 1000000
  LOG_STEPS: 1000
  VAL_STEPS: 1000
  CHECKPOINT_STEPS: 1000
  CHECKPOINT_SAVE_TOP_K: 1
  NUM_WORKERS: 8
  PREFETCH_FACTOR: 2
TRAIN:
  LR: 1.0e-05
  WEIGHT_DECAY: 0.0001
  BATCH_SIZE: 32
  LOSS_REDUCTION: mean
  NUM_TRAIN_SAMPLES: 2
  NUM_TEST_SAMPLES: 64
  POSE_2D_NOISE_RATIO: 0.01
  SMPL_PARAM_NOISE_RATIO: 0.005
MODEL:
  IMAGE_SIZE: 256
  IMAGE_MEAN:
  - 0.485
  - 0.456
  - 0.406
  IMAGE_STD:
  - 0.229
  - 0.224
  - 0.225
  BACKBONE:
    TYPE: vit
    PRETRAINED_WEIGHTS: training_data/vitpose_backbone.pth
  MANO_HEAD:
    TYPE: transformer_decoder
    IN_CHANNELS: 2048
    TRANSFORMER_DECODER:
      depth: 6
      heads: 8
      mlp_dim: 1024
      dim_head: 64
      dropout: 0.0
      emb_dropout: 0.0
      norm: layer
      context_dim: 1280
LOSS_WEIGHTS:
  KEYPOINTS_3D: 0.05
  KEYPOINTS_2D: 0.01
  GLOBAL_ORIENT: 0.001
  HAND_POSE: 0.001
  BETAS: 0.0005
  ADVERSARIAL: 0.0005