python clip_aoi_train.py \ | |
--output_dir ./aoi_clip_high_resolution_crossAttenttionFusion_fusin_gpt_random_sampler \ | |
--model_name_or_path OFA-Sys/chinese-clip-vit-base-patch16\ | |
--image_processor_name OFA-Sys/chinese-clip-vit-base-patch16 \ | |
--tokenizer_name OFA-Sys/chinese-clip-vit-base-patch16 \ | |
--train_file ./data_csv/AOI/train_high_resolution_gpt.csv \ | |
--validation_file ./data_csv/AOI/valid_high_resolution_gpt.csv \ | |
--image_column image_path --caption_column text --aoi_caption_column aoi_text\ | |
--remove_unused_columns=False --do_eval --do_train \ | |
--per_device_train_batch_size=40 --per_device_eval_batch_size=20 \ | |
--gradient_accumulation_steps=10 --overwrite_output_dir\ | |
--num_train_epochs=200 \ | |
--learning_rate=1e-5 --warmup_steps=0 --weight_decay 0.1 \ | |
--save_total_limit 5 \ | |
--eval_strategy steps --logging_steps 0.1 --report_to wandb --fp16 --push_to_hub --batch_eval_metrics \ | |
--max_seq_length 153 --aoi_max_seq_length 384 | |
#--model_name_or_path OFA-Sys/chinese-clip-vit-base-patch16 | |
#--overwrite_output_dir \ | |
# aoi_clip_high_resolution_concate_fusin_gpt_random_sampler |