|
accelerator: |
|
_target_: trainer.accelerators.deepspeed_accelerator.DeepSpeedAccelerator |
|
output_dir: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage2_eval_from_latest |
|
mixed_precision: BF16 |
|
gradient_accumulation_steps: 1 |
|
log_with: WANDB |
|
debug: |
|
activate: false |
|
port: 5900 |
|
seed: 42 |
|
resume_from_latest: true |
|
resume_from_name_or_path: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage1_neg/checkpoint-latest |
|
train_from_scratch: true |
|
max_steps: 4000 |
|
num_epochs: 10 |
|
validate_steps: 100 |
|
eval_on_start: true |
|
project_name: Multi-CLIP_ViT_H_14-clean |
|
max_grad_norm: 1.0 |
|
save_steps: 100 |
|
metric_name: loss |
|
metric_mode: MIN |
|
limit_num_checkpoints: 3 |
|
save_only_if_best: true |
|
dynamo_backend: 'NO' |
|
keep_best_ckpts: true |
|
deepspeed: |
|
fp16: |
|
enabled: false |
|
bf16: |
|
enabled: true |
|
optimizer: |
|
type: AdamW |
|
params: |
|
lr: auto |
|
weight_decay: auto |
|
torch_adam: true |
|
adam_w_mode: true |
|
scheduler: |
|
type: WarmupDecayLR |
|
params: |
|
warmup_min_lr: auto |
|
warmup_max_lr: auto |
|
warmup_num_steps: auto |
|
total_num_steps: auto |
|
zero_optimization: |
|
stage: 2 |
|
allgather_partitions: true |
|
allgather_bucket_size: 200000000.0 |
|
overlap_comm: true |
|
reduce_scatter: true |
|
reduce_bucket_size: 500000000 |
|
contiguous_gradients: true |
|
gradient_accumulation_steps: 16 |
|
gradient_clipping: 1.0 |
|
steps_per_print: 1 |
|
train_batch_size: auto |
|
train_micro_batch_size_per_gpu: auto |
|
wall_clock_breakdown: false |
|
deepspeed_final: |
|
fp16: |
|
enabled: false |
|
bf16: |
|
enabled: true |
|
optimizer: |
|
type: AdamW |
|
params: |
|
lr: auto |
|
weight_decay: auto |
|
torch_adam: true |
|
adam_w_mode: true |
|
scheduler: |
|
type: WarmupDecayLR |
|
params: |
|
warmup_min_lr: auto |
|
warmup_max_lr: auto |
|
warmup_num_steps: auto |
|
total_num_steps: auto |
|
zero_optimization: |
|
stage: 2 |
|
allgather_partitions: true |
|
allgather_bucket_size: 200000000.0 |
|
overlap_comm: true |
|
reduce_scatter: true |
|
reduce_bucket_size: 500000000 |
|
contiguous_gradients: true |
|
gradient_accumulation_steps: 16 |
|
gradient_clipping: 1.0 |
|
steps_per_print: .inf |
|
train_batch_size: auto |
|
train_micro_batch_size_per_gpu: auto |
|
wall_clock_breakdown: false |
|
task: |
|
limit_examples_to_wandb: 50 |
|
_target_: trainer.tasks.mvclip_task.MVCLIPTask |
|
pretrained_clip_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K |
|
label_0_column_name: label_0 |
|
label_1_column_name: label_1 |
|
reference_type_column_name: reference_type |
|
reference_input_column_name: reference_input |
|
reference_idx_column_name: reference_idx |
|
normal_pixels_0_column_name: normal_pixel_values_0 |
|
normal_pixels_1_column_name: normal_pixel_values_1 |
|
rgb_pixels_0_column_name: rgb_pixel_values_0 |
|
rgb_pixels_1_column_name: rgb_pixel_values_1 |
|
model: |
|
_target_: trainer.models.mvclip_model.MVCLIPModel |
|
pretrained_clip_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K |
|
freeze_clip: true |
|
freeze_vision: false |
|
freeze_logit_scale: false |
|
freeze_logit_proj: false |
|
criterion: |
|
_target_: trainer.criterions.criterion.MVCriterion |
|
is_distributed: true |
|
reference_type_column_name: reference_type |
|
reference_input_column_name: reference_input |
|
label_0_column_name: label_0 |
|
label_1_column_name: label_1 |
|
normal_pixels_0_column_name: normal_pixel_values_0 |
|
normal_pixels_1_column_name: normal_pixel_values_1 |
|
rgb_pixels_0_column_name: rgb_pixel_values_0 |
|
rgb_pixels_1_column_name: rgb_pixel_values_1 |
|
num_examples_per_prompt_column_name: num_example_per_prompt |
|
in_batch_negatives: false |
|
dataset: |
|
train_split_name: train_valid |
|
valid_split_name: valid |
|
test_split_name: test |
|
batch_size: 16 |
|
num_workers: 2 |
|
drop_last: true |
|
_target_: trainer.datasetss.dataset.MVDataset |
|
records_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/records/battle/clean/splits |
|
gallery_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510 |
|
text_gallery_path: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510/prompts_510.json |
|
image_gallery_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510/rgba |
|
con_images_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/con_images |
|
split_con_image: false |
|
cache_dir: null |
|
reference_type_column_name: reference_type |
|
reference_idx_column_name: reference_idx |
|
reference_column_name: reference |
|
normal_image_0_column_name: normal_image_0 |
|
normal_image_1_column_name: normal_image_1 |
|
rgb_image_0_column_name: rgb_image_0 |
|
rgb_image_1_column_name: rgb_image_1 |
|
eval_dims_column_name: eval_dims |
|
label_0_column_name: label_0 |
|
label_1_column_name: label_1 |
|
are_different_column_name: are_different |
|
has_label_column_name: has_label |
|
reference_input_column_name: reference_input |
|
normal_pixels_0_column_name: normal_pixel_values_0 |
|
normal_pixels_1_column_name: normal_pixel_values_1 |
|
rgb_pixels_0_column_name: rgb_pixel_values_0 |
|
rgb_pixels_1_column_name: rgb_pixel_values_1 |
|
num_examples_per_prompt_column_name: num_example_per_prompt |
|
shuffle: true |
|
keep_only_different: false |
|
keep_only_with_label: false |
|
keep_only_with_label_in_non_train: true |
|
processor: |
|
_target_: transformers.AutoProcessor.from_pretrained |
|
pretrained_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K |
|
limit_examples_per_prompt: -1 |
|
only_on_best: false |
|
optimizer: |
|
_target_: trainer.optimizers.dummy_optimizer.BaseDummyOptim |
|
lr: 3.0e-06 |
|
weight_decay: 0.3 |
|
lr_scheduler: |
|
_target_: trainer.lr_schedulers.dummy_lr_scheduler.instantiate_dummy_lr_scheduler |
|
lr: 3.0e-06 |
|
lr_warmup_steps: 500 |
|
total_num_steps: 4000 |
|
debug: |
|
activate: false |
|
port: 5900 |
|
output_dir: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage2_eval_from_latest |
|
|