File size: 5,811 Bytes
94f0dfc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
accelerator:
  _target_: trainer.accelerators.deepspeed_accelerator.DeepSpeedAccelerator
  output_dir: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage2_eval_from_latest
  mixed_precision: BF16
  gradient_accumulation_steps: 1
  log_with: WANDB
  debug:
    activate: false
    port: 5900
  seed: 42
  resume_from_latest: true
  resume_from_name_or_path: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage1_neg/checkpoint-latest
  train_from_scratch: true
  max_steps: 4000
  num_epochs: 10
  validate_steps: 100
  eval_on_start: true
  project_name: Multi-CLIP_ViT_H_14-clean
  max_grad_norm: 1.0
  save_steps: 100
  metric_name: loss
  metric_mode: MIN
  limit_num_checkpoints: 3
  save_only_if_best: true
  dynamo_backend: 'NO'
  keep_best_ckpts: true
  deepspeed:
    fp16:
      enabled: false
    bf16:
      enabled: true
    optimizer:
      type: AdamW
      params:
        lr: auto
        weight_decay: auto
        torch_adam: true
        adam_w_mode: true
    scheduler:
      type: WarmupDecayLR
      params:
        warmup_min_lr: auto
        warmup_max_lr: auto
        warmup_num_steps: auto
        total_num_steps: auto
    zero_optimization:
      stage: 2
      allgather_partitions: true
      allgather_bucket_size: 200000000.0
      overlap_comm: true
      reduce_scatter: true
      reduce_bucket_size: 500000000
      contiguous_gradients: true
    gradient_accumulation_steps: 16
    gradient_clipping: 1.0
    steps_per_print: 1
    train_batch_size: auto
    train_micro_batch_size_per_gpu: auto
    wall_clock_breakdown: false
  deepspeed_final:
    fp16:
      enabled: false
    bf16:
      enabled: true
    optimizer:
      type: AdamW
      params:
        lr: auto
        weight_decay: auto
        torch_adam: true
        adam_w_mode: true
    scheduler:
      type: WarmupDecayLR
      params:
        warmup_min_lr: auto
        warmup_max_lr: auto
        warmup_num_steps: auto
        total_num_steps: auto
    zero_optimization:
      stage: 2
      allgather_partitions: true
      allgather_bucket_size: 200000000.0
      overlap_comm: true
      reduce_scatter: true
      reduce_bucket_size: 500000000
      contiguous_gradients: true
    gradient_accumulation_steps: 16
    gradient_clipping: 1.0
    steps_per_print: .inf
    train_batch_size: auto
    train_micro_batch_size_per_gpu: auto
    wall_clock_breakdown: false
task:
  limit_examples_to_wandb: 50
  _target_: trainer.tasks.mvclip_task.MVCLIPTask
  pretrained_clip_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K
  label_0_column_name: label_0
  label_1_column_name: label_1
  reference_type_column_name: reference_type
  reference_input_column_name: reference_input
  reference_idx_column_name: reference_idx
  normal_pixels_0_column_name: normal_pixel_values_0
  normal_pixels_1_column_name: normal_pixel_values_1
  rgb_pixels_0_column_name: rgb_pixel_values_0
  rgb_pixels_1_column_name: rgb_pixel_values_1
model:
  _target_: trainer.models.mvclip_model.MVCLIPModel
  pretrained_clip_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K
  freeze_clip: true
  freeze_vision: false
  freeze_logit_scale: false
  freeze_logit_proj: false
criterion:
  _target_: trainer.criterions.criterion.MVCriterion
  is_distributed: true
  reference_type_column_name: reference_type
  reference_input_column_name: reference_input
  label_0_column_name: label_0
  label_1_column_name: label_1
  normal_pixels_0_column_name: normal_pixel_values_0
  normal_pixels_1_column_name: normal_pixel_values_1
  rgb_pixels_0_column_name: rgb_pixel_values_0
  rgb_pixels_1_column_name: rgb_pixel_values_1
  num_examples_per_prompt_column_name: num_example_per_prompt
  in_batch_negatives: false
dataset:
  train_split_name: train_valid
  valid_split_name: valid
  test_split_name: test
  batch_size: 16
  num_workers: 2
  drop_last: true
  _target_: trainer.datasetss.dataset.MVDataset
  records_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/records/battle/clean/splits
  gallery_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510
  text_gallery_path: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510/prompts_510.json
  image_gallery_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510/rgba
  con_images_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/con_images
  split_con_image: false
  cache_dir: null
  reference_type_column_name: reference_type
  reference_idx_column_name: reference_idx
  reference_column_name: reference
  normal_image_0_column_name: normal_image_0
  normal_image_1_column_name: normal_image_1
  rgb_image_0_column_name: rgb_image_0
  rgb_image_1_column_name: rgb_image_1
  eval_dims_column_name: eval_dims
  label_0_column_name: label_0
  label_1_column_name: label_1
  are_different_column_name: are_different
  has_label_column_name: has_label
  reference_input_column_name: reference_input
  normal_pixels_0_column_name: normal_pixel_values_0
  normal_pixels_1_column_name: normal_pixel_values_1
  rgb_pixels_0_column_name: rgb_pixel_values_0
  rgb_pixels_1_column_name: rgb_pixel_values_1
  num_examples_per_prompt_column_name: num_example_per_prompt
  shuffle: true
  keep_only_different: false
  keep_only_with_label: false
  keep_only_with_label_in_non_train: true
  processor:
    _target_: transformers.AutoProcessor.from_pretrained
    pretrained_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K
  limit_examples_per_prompt: -1
  only_on_best: false
optimizer:
  _target_: trainer.optimizers.dummy_optimizer.BaseDummyOptim
  lr: 3.0e-06
  weight_decay: 0.3
lr_scheduler:
  _target_: trainer.lr_schedulers.dummy_lr_scheduler.instantiate_dummy_lr_scheduler
  lr: 3.0e-06
  lr_warmup_steps: 500
  total_num_steps: 4000
debug:
  activate: false
  port: 5900
output_dir: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage2_eval_from_latest