MiniGPT4-video / train_configs /224_minigpt4_llama2_image_align.yaml
Vision-CAIR's picture
apply_latest_updates
a6e7156
raw
history blame contribute delete
993 Bytes
model:
arch: mini_gpt4_llama_v2
model_type: pretrain_vicuna
llama_model: "meta-llama/Llama-2-7b-chat-hf"
max_txt_len: 160
max_context_len: 512
end_sym: "</s>"
prompt_path: "train_configs/alignment.txt"
prompt_template: '[INST] {} [/INST] '
ckpt: put your pretrained ckpt here
datasets:
cc_sbu_align:
batch_size: 12
vis_processor:
train:
name: "blip2_image_train"
image_size: 224
text_processor:
train:
name: "blip_caption"
run:
task: image_text_pretrain
# optimizer
lr_sched: "linear_warmup_cosine_lr"
init_lr: 3e-5
min_lr: 1e-5
warmup_lr: 1e-6
weight_decay: 0.05
max_epoch: 5
iters_per_epoch: 200
num_workers: 4
warmup_steps: 200
seed: 42
output_dir: "output/minigpt4_stage2_finetune"
amp: True
resume_ckpt_path: null
evaluate: False
train_splits: ["train"]
device: "cuda"
world_size: 1
dist_url: "env://"
distributed: True
wandb_log: True
job_name: minigpt4_finetune