vidore
/

bipali

Model card Files Files and versions Community

bipali / training_config.yml

manu's picture

Upload folder using huggingface_hub

5b7aafb verified 5 months ago

history blame contribute delete

1.53 kB

	config:
	(): custom_colbert.utils.train_custom_colbert_models.ColModelTrainingConfig
	output_dir: !path ../../../models/without_tabfquad_no_pairwise/train_bipali_mean-3b-mix-448
	processor:
	() : custom_colbert.utils.wrapper.AutoProcessorWrapper
	pretrained_model_name_or_path: "./models/paligemma-3b-mix-448"
	max_length: 50
	model:
	(): custom_colbert.utils.wrapper.AutoColModelWrapper
	pretrained_model_name_or_path: "./models/paligemma-3b-mix-448"
	training_objective: "biencoder_mean"
	# attn_implementation: "eager"
	torch_dtype: !ext torch.bfloat16
	# device_map: "auto"
	# quantization_config:
	# (): transformers.BitsAndBytesConfig
	# load_in_4bit: true
	# bnb_4bit_quant_type: "nf4"
	# bnb_4bit_compute_dtype: "bfloat16"
	# bnb_4bit_use_double_quant: true

	dataset_loading_func: !ext custom_colbert.utils.dataset_transformation.load_train_set
	eval_dataset_loader: !import ../data/test_data.yaml

	max_length: 50
	run_eval: true
	add_suffix: true
	loss_func:
	(): custom_colbert.loss.colbert_loss.BiEncoderLoss
	tr_args: !import ../tr_args/default_tr_args.yaml
	peft_config:
	(): peft.LoraConfig
	r: 32
	lora_alpha: 32
	lora_dropout: 0.1
	init_lora_weights: "gaussian"
	bias: "none"
	task_type: "FEATURE_EXTRACTION"
	target_modules: '(.(language_model).(down_proj\|gate_proj\|up_proj\|k_proj\|q_proj\|v_proj\|o_proj).*$)'
	# target_modules: '(.(language_model).(down_proj\|gate_proj\|up_proj\|k_proj\|q_proj\|v_proj\|o_proj).*$'