3dgen-score-mvclip-v1 / config.yaml
ZhangYuhan's picture
Upload config.yaml
94f0dfc verified
accelerator:
_target_: trainer.accelerators.deepspeed_accelerator.DeepSpeedAccelerator
output_dir: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage2_eval_from_latest
mixed_precision: BF16
gradient_accumulation_steps: 1
log_with: WANDB
debug:
activate: false
port: 5900
seed: 42
resume_from_latest: true
resume_from_name_or_path: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage1_neg/checkpoint-latest
train_from_scratch: true
max_steps: 4000
num_epochs: 10
validate_steps: 100
eval_on_start: true
project_name: Multi-CLIP_ViT_H_14-clean
max_grad_norm: 1.0
save_steps: 100
metric_name: loss
metric_mode: MIN
limit_num_checkpoints: 3
save_only_if_best: true
dynamo_backend: 'NO'
keep_best_ckpts: true
deepspeed:
fp16:
enabled: false
bf16:
enabled: true
optimizer:
type: AdamW
params:
lr: auto
weight_decay: auto
torch_adam: true
adam_w_mode: true
scheduler:
type: WarmupDecayLR
params:
warmup_min_lr: auto
warmup_max_lr: auto
warmup_num_steps: auto
total_num_steps: auto
zero_optimization:
stage: 2
allgather_partitions: true
allgather_bucket_size: 200000000.0
overlap_comm: true
reduce_scatter: true
reduce_bucket_size: 500000000
contiguous_gradients: true
gradient_accumulation_steps: 16
gradient_clipping: 1.0
steps_per_print: 1
train_batch_size: auto
train_micro_batch_size_per_gpu: auto
wall_clock_breakdown: false
deepspeed_final:
fp16:
enabled: false
bf16:
enabled: true
optimizer:
type: AdamW
params:
lr: auto
weight_decay: auto
torch_adam: true
adam_w_mode: true
scheduler:
type: WarmupDecayLR
params:
warmup_min_lr: auto
warmup_max_lr: auto
warmup_num_steps: auto
total_num_steps: auto
zero_optimization:
stage: 2
allgather_partitions: true
allgather_bucket_size: 200000000.0
overlap_comm: true
reduce_scatter: true
reduce_bucket_size: 500000000
contiguous_gradients: true
gradient_accumulation_steps: 16
gradient_clipping: 1.0
steps_per_print: .inf
train_batch_size: auto
train_micro_batch_size_per_gpu: auto
wall_clock_breakdown: false
task:
limit_examples_to_wandb: 50
_target_: trainer.tasks.mvclip_task.MVCLIPTask
pretrained_clip_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K
label_0_column_name: label_0
label_1_column_name: label_1
reference_type_column_name: reference_type
reference_input_column_name: reference_input
reference_idx_column_name: reference_idx
normal_pixels_0_column_name: normal_pixel_values_0
normal_pixels_1_column_name: normal_pixel_values_1
rgb_pixels_0_column_name: rgb_pixel_values_0
rgb_pixels_1_column_name: rgb_pixel_values_1
model:
_target_: trainer.models.mvclip_model.MVCLIPModel
pretrained_clip_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K
freeze_clip: true
freeze_vision: false
freeze_logit_scale: false
freeze_logit_proj: false
criterion:
_target_: trainer.criterions.criterion.MVCriterion
is_distributed: true
reference_type_column_name: reference_type
reference_input_column_name: reference_input
label_0_column_name: label_0
label_1_column_name: label_1
normal_pixels_0_column_name: normal_pixel_values_0
normal_pixels_1_column_name: normal_pixel_values_1
rgb_pixels_0_column_name: rgb_pixel_values_0
rgb_pixels_1_column_name: rgb_pixel_values_1
num_examples_per_prompt_column_name: num_example_per_prompt
in_batch_negatives: false
dataset:
train_split_name: train_valid
valid_split_name: valid
test_split_name: test
batch_size: 16
num_workers: 2
drop_last: true
_target_: trainer.datasetss.dataset.MVDataset
records_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/records/battle/clean/splits
gallery_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510
text_gallery_path: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510/prompts_510.json
image_gallery_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/gallery510/rgba
con_images_dir: /mnt/petrelfs/zhangyuhan/data/evaluation/con_images
split_con_image: false
cache_dir: null
reference_type_column_name: reference_type
reference_idx_column_name: reference_idx
reference_column_name: reference
normal_image_0_column_name: normal_image_0
normal_image_1_column_name: normal_image_1
rgb_image_0_column_name: rgb_image_0
rgb_image_1_column_name: rgb_image_1
eval_dims_column_name: eval_dims
label_0_column_name: label_0
label_1_column_name: label_1
are_different_column_name: are_different
has_label_column_name: has_label
reference_input_column_name: reference_input
normal_pixels_0_column_name: normal_pixel_values_0
normal_pixels_1_column_name: normal_pixel_values_1
rgb_pixels_0_column_name: rgb_pixel_values_0
rgb_pixels_1_column_name: rgb_pixel_values_1
num_examples_per_prompt_column_name: num_example_per_prompt
shuffle: true
keep_only_different: false
keep_only_with_label: false
keep_only_with_label_in_non_train: true
processor:
_target_: transformers.AutoProcessor.from_pretrained
pretrained_model_name_or_path: laion/CLIP-ViT-H-14-laion2B-s32B-b79K
limit_examples_per_prompt: -1
only_on_best: false
optimizer:
_target_: trainer.optimizers.dummy_optimizer.BaseDummyOptim
lr: 3.0e-06
weight_decay: 0.3
lr_scheduler:
_target_: trainer.lr_schedulers.dummy_lr_scheduler.instantiate_dummy_lr_scheduler
lr: 3.0e-06
lr_warmup_steps: 500
total_num_steps: 4000
debug:
activate: false
port: 5900
output_dir: outputs/Multi-CLIP_ViT_H_14-clean/from_con_image/stage2_eval_from_latest