# fine-tuning Llama-2-7b chat version on PileNER with topNE NEs and N samples per NE (N positive + N negative) base_model: meta-llama/Llama-2-7b-chat-hf prompt_template_name: llama2_italian # using dataset converted from MSEQA format to GenQA format (instruction, input, output) columns data_path: None val_data_path: None select_train_portion: -1 val_set_size: -1 # if -1 use all validation data output_dir: None early_stopping_patience: 5 #training hyperparams batch_size: 32 micro_batch_size: 1 num_epochs: 10 learning_rate: 3.0e-4 cutoff_len: 768 warmup_steps: 60 eval_steps: 20 logging_steps: 5 max_grad_norm: 1.0 #lora hyperparams use_lora: True lora_alpha: 16 lora_dropout: 0.05 lora_r: 8 lora_target_modules: - q_proj - v_proj - k_proj - v_proj #llm hyperparams # NTP loss only on Response train_on_inputs: False group_by_length: True #quant params load_8bit: False load_4bit: False #general param save_total_limit: 2 use_flash_attention: False shuffle: True gradient_checkpointing: False