accelerator: _target_: trainer.accelerators.deepspeed_accelerator.DeepSpeedAccelerator output_dir: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage2_eval_from_latest mixed_precision: BF16 gradient_accumulation_steps: 1 log_with: WANDB debug: activate: false port: 5900 seed: 42 resume_from_latest: true resume_from_name_or_path: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage1_neg/checkpoint-latest train_from_scratch: true max_steps: 4000 num_epochs: 10 validate_steps: 100 eval_on_start: true project_name: Multi-CLIP_ViT_H_14-clean max_grad_norm: 1.0 save_steps: 100 metric_name: loss metric_mode: MIN limit_num_checkpoints: 3 save_only_if_best: true dynamo_backend: 'NO' keep_best_ckpts: true deepspeed: fp16: enabled: false bf16: enabled: true optimizer: type: AdamW params: lr: auto weight_decay: auto torch_adam: true adam_w_mode: true scheduler: type: WarmupDecayLR params: warmup_min_lr: auto warmup_max_lr: auto warmup_num_steps: auto total_num_steps: auto zero_optimization: stage: 2 allgather_partitions: true allgather_bucket_size: 200000000.0 overlap_comm: true reduce_scatter: true reduce_bucket_size: 500000000 contiguous_gradients: true gradient_accumulation_steps: 16 gradient_clipping: 1.0 steps_per_print: 1 train_batch_size: auto train_micro_batch_size_per_gpu: auto wall_clock_breakdown: false deepspeed_final: fp16: enabled: false bf16: enabled: true optimizer: type: AdamW params: lr: auto weight_decay: auto torch_adam: true adam_w_mode: true scheduler: type: WarmupDecayLR params: warmup_min_lr: auto warmup_max_lr: auto warmup_num_steps: auto total_num_steps: auto zero_optimization: stage: 2 allgather_partitions: true allgather_bucket_size: 200000000.0 overlap_comm: true reduce_scatter: true reduce_bucket_size: 500000000 contiguous_gradients: true gradient_accumulation_steps: 16 gradient_clipping: 1.0 steps_per_print: .inf train_batch_size: auto train_micro_batch_size_per_gpu: auto wall_clock_breakdown: false task: limit_examples_to_wandb: 50 _target_: trainer.tasks.mvclip_task.MVCLIPTask pretrained_clip_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K label_0_column_name: label_0 label_1_column_name: label_1 reference_type_column_name: reference_type reference_input_column_name: reference_input reference_idx_column_name: reference_idx normal_pixels_0_column_name: normal_pixel_values_0 normal_pixels_1_column_name: normal_pixel_values_1 rgb_pixels_0_column_name: rgb_pixel_values_0 rgb_pixels_1_column_name: rgb_pixel_values_1 model: _target_: trainer.models.mvclip_model.MVCLIPModel pretrained_clip_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K freeze_clip: true freeze_vision: false freeze_logit_scale: false freeze_logit_proj: false criterion: _target_: trainer.criterions.criterion.MVCriterion is_distributed: true reference_type_column_name: reference_type reference_input_column_name: reference_input label_0_column_name: label_0 label_1_column_name: label_1 normal_pixels_0_column_name: normal_pixel_values_0 normal_pixels_1_column_name: normal_pixel_values_1 rgb_pixels_0_column_name: rgb_pixel_values_0 rgb_pixels_1_column_name: rgb_pixel_values_1 num_examples_per_prompt_column_name: num_example_per_prompt in_batch_negatives: false dataset: train_split_name: train_valid valid_split_name: valid test_split_name: test batch_size: 16 num_workers: 2 drop_last: true _target_: trainer.datasetss.dataset.MVDataset records_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/records/battle/clean/splits gallery_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510 text_gallery_path: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510/prompts_510.json image_gallery_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510/rgba con_images_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/con_images split_con_image: false cache_dir: null reference_type_column_name: reference_type reference_idx_column_name: reference_idx reference_column_name: reference normal_image_0_column_name: normal_image_0 normal_image_1_column_name: normal_image_1 rgb_image_0_column_name: rgb_image_0 rgb_image_1_column_name: rgb_image_1 eval_dims_column_name: eval_dims label_0_column_name: label_0 label_1_column_name: label_1 are_different_column_name: are_different has_label_column_name: has_label reference_input_column_name: reference_input normal_pixels_0_column_name: normal_pixel_values_0 normal_pixels_1_column_name: normal_pixel_values_1 rgb_pixels_0_column_name: rgb_pixel_values_0 rgb_pixels_1_column_name: rgb_pixel_values_1 num_examples_per_prompt_column_name: num_example_per_prompt shuffle: true keep_only_different: false keep_only_with_label: false keep_only_with_label_in_non_train: true processor: _target_: transformers.AutoProcessor.from_pretrained pretrained_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K limit_examples_per_prompt: -1 only_on_best: false optimizer: _target_: trainer.optimizers.dummy_optimizer.BaseDummyOptim lr: 3.0e-06 weight_decay: 0.3 lr_scheduler: _target_: trainer.lr_schedulers.dummy_lr_scheduler.instantiate_dummy_lr_scheduler lr: 3.0e-06 lr_warmup_steps: 500 total_num_steps: 4000 debug: activate: false port: 5900 output_dir: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage2_eval_from_latest