|
{ |
|
"accelerator_kwargs": {}, |
|
"adap_kl_ctrl": true, |
|
"backward_batch_size": 64, |
|
"batch_size": 64, |
|
"cliprange": 0.2, |
|
"cliprange_value": 0.2, |
|
"compare_steps": 1, |
|
"dataset_num_proc": null, |
|
"early_stopping": false, |
|
"exp_name": "stego_trainer", |
|
"forward_batch_size": null, |
|
"gamma": 1, |
|
"global_backward_batch_size": 64, |
|
"global_batch_size": 64, |
|
"gradient_accumulation_steps": 4, |
|
"gradient_checkpointing": false, |
|
"horizon": 10000, |
|
"init_kl_coef": 0.05, |
|
"is_encoder_decoder": false, |
|
"is_peft_model": true, |
|
"kl_penalty": "kl", |
|
"lam": 0.95, |
|
"learning_rate": 2e-05, |
|
"log_with": "wandb", |
|
"max_grad_norm": null, |
|
"mini_batch_size": 16, |
|
"model_name": "unsloth/gemma-2-2b-it", |
|
"optimize_cuda_cache": true, |
|
"optimize_device_cache": false, |
|
"ppo_epochs": 4, |
|
"project_kwargs": {}, |
|
"push_to_hub_if_best_kwargs": {}, |
|
"query_dataset": "imdb", |
|
"ratio_threshold": 10.0, |
|
"remove_unused_columns": true, |
|
"reward_model": "sentiment-analysis:lvwerra/distilbert-imdb", |
|
"score_clip": null, |
|
"seed": 0, |
|
"steps": 20000, |
|
"target": 12.0, |
|
"target_kl": 1, |
|
"task_name": null, |
|
"tracker_kwargs": { |
|
"wandb": { |
|
"name": "cv_gemma-2-2b-it_to_distilbert-base-uncased_EBS64_Joan", |
|
"notes": "Dataset: cv\n Same Prompt: \n Payload Prefixes: ['Movie Review: This movie was really amazing!', 'Movie Review: This movie was really terrible!']\n Payload Template: Movie Review: This movie was really {payload}!\n Separate Enc/Dec Data: True\n\n Encoder: gemma-2-2b-it (LR: 2e-05)\n Decoder: distilbert-base-uncased (LR: 0.0001)\n Train Loop: v2_dylan\n\n Effective Batch Sizes:\n - Encoder: 64\n - Decoder: 512\n\n Training Iterations:\n - Encoder updates: 100\n - Decoder updates: 400\n - Update Encoder First: False\n\n Temperatures:\n - Decoder Training: 1.0\n - Encoder Training: 1.0\n - Evaluation: 1.0\n\n Encoder Parameters:\n - KL Coefficient: 0.05\n - LoRA: True\n - Quantization: False\n - Output Length: {'min': 42, 'max': 51}\n\n Decoder Parameters:\n - New Classification Head: True\n - Use Probs Reward: False\n - Weight Decay: 0.01\n - Update Parameters: {'head': True, 'body': True}\n\n Training Configuration:\n - Update Encoder: True\n - Update Decoder: True\n - Paraphrase: False\n - Leak Password: False\n - WandB Logging: True\n - Eval Every N: 50\n - Number of Epochs: 100000\n\n Debug:\n - Override Dec Batch: False", |
|
"tags": [ |
|
"cv", |
|
"gemma-2-2b-it", |
|
"distilbert-base-uncased", |
|
"v2_dylan", |
|
"enc_lr_2e-05", |
|
"dec_lr_0.0001", |
|
"enc_eff_bs_64", |
|
"dec_eff_bs_512", |
|
"enc_updates_100", |
|
"dec_updates_400", |
|
"LoRA", |
|
"Full_Precision", |
|
"same_prompt_ ", |
|
"Separate_Enc_Dec_Data", |
|
"Update_Enc", |
|
"Update_Dec", |
|
"No_Paraphrase", |
|
"No_Leak", |
|
"1-bit" |
|
] |
|
} |
|
}, |
|
"tracker_project_name": "trl", |
|
"use_score_norm": false, |
|
"use_score_scaling": false, |
|
"vf_coef": 0.1, |
|
"whiten_rewards": false, |
|
"world_size": 1 |
|
} |