|
!!python/object:aether.train.train.TrainingArguments |
|
output_dir: /mnt/disks/persist/data/checkpoints/H32-dh32 |
|
overwrite_output_dir: false |
|
do_train: false |
|
do_eval: false |
|
do_predict: false |
|
eval_strategy: 'no' |
|
prediction_loss_only: false |
|
per_device_train_batch_size: 32 |
|
per_device_eval_batch_size: 8 |
|
per_gpu_train_batch_size: null |
|
per_gpu_eval_batch_size: null |
|
gradient_accumulation_steps: 1 |
|
eval_accumulation_steps: null |
|
eval_delay: 0 |
|
torch_empty_cache_steps: null |
|
learning_rate: 0.001 |
|
weight_decay: 0.05 |
|
adam_beta1: 0.9 |
|
adam_beta2: 0.999 |
|
adam_epsilon: 1.0e-08 |
|
max_grad_norm: 1.0 |
|
num_train_epochs: 3.0 |
|
max_steps: 75000 |
|
lr_scheduler_type: constant |
|
lr_scheduler_kwargs: {} |
|
warmup_ratio: 0.0 |
|
warmup_steps: 0 |
|
log_level: passive |
|
log_level_replica: warning |
|
log_on_each_node: true |
|
logging_dir: null |
|
logging_strategy: steps |
|
logging_first_step: true |
|
logging_steps: 250 |
|
logging_nan_inf_filter: true |
|
save_strategy: steps |
|
save_steps: 300 |
|
save_total_limit: null |
|
save_safetensors: true |
|
save_on_each_node: false |
|
save_only_model: false |
|
restore_callback_states_from_checkpoint: false |
|
no_cuda: false |
|
use_cpu: false |
|
use_mps_device: false |
|
seed: 42 |
|
data_seed: null |
|
jit_mode_eval: false |
|
use_ipex: false |
|
bf16: false |
|
fp16: false |
|
fp16_opt_level: O1 |
|
half_precision_backend: auto |
|
bf16_full_eval: false |
|
fp16_full_eval: false |
|
tf32: null |
|
local_rank: -1 |
|
ddp_backend: null |
|
tpu_num_cores: null |
|
tpu_metrics_debug: false |
|
debug: '' |
|
dataloader_drop_last: false |
|
eval_steps: null |
|
dataloader_num_workers: 0 |
|
dataloader_prefetch_factor: null |
|
past_index: -1 |
|
run_name: H32-dh32 |
|
disable_tqdm: null |
|
remove_unused_columns: false |
|
label_names: |
|
- input_ids |
|
load_best_model_at_end: false |
|
metric_for_best_model: null |
|
greater_is_better: null |
|
ignore_data_skip: false |
|
fsdp: '' |
|
fsdp_min_num_params: 0 |
|
fsdp_config: null |
|
fsdp_transformer_layer_cls_to_wrap: null |
|
accelerator_config: null |
|
deepspeed: null |
|
label_smoothing_factor: 0.0 |
|
optim: adamw_torch |
|
optim_args: null |
|
adafactor: false |
|
group_by_length: false |
|
length_column_name: length |
|
report_to: null |
|
ddp_find_unused_parameters: null |
|
ddp_bucket_cap_mb: null |
|
ddp_broadcast_buffers: null |
|
dataloader_pin_memory: true |
|
dataloader_persistent_workers: false |
|
skip_memory_metrics: true |
|
use_legacy_prediction_loop: false |
|
push_to_hub: false |
|
resume_from_checkpoint: null |
|
hub_model_id: timaeus/H32-dh32 |
|
hub_strategy: every_save |
|
hub_token: null |
|
hub_private_repo: false |
|
hub_always_push: false |
|
gradient_checkpointing: false |
|
gradient_checkpointing_kwargs: null |
|
include_inputs_for_metrics: false |
|
eval_do_concat_batches: true |
|
fp16_backend: auto |
|
evaluation_strategy: null |
|
push_to_hub_model_id: null |
|
push_to_hub_organization: null |
|
push_to_hub_token: null |
|
mp_parameters: '' |
|
auto_find_batch_size: false |
|
full_determinism: false |
|
torchdynamo: null |
|
ray_scope: last |
|
ddp_timeout: 1800 |
|
torch_compile: false |
|
torch_compile_backend: null |
|
torch_compile_mode: null |
|
dispatch_batches: null |
|
split_batches: null |
|
include_tokens_per_second: false |
|
include_num_input_tokens_seen: false |
|
neftune_noise_alpha: null |
|
optim_target_modules: null |
|
batch_eval_metrics: false |
|
eval_on_start: false |
|
use_liger_kernel: false |
|
eval_use_gather_object: false |
|
checkpoints_dir: /mnt/disks/persist/data/checkpoints |
|
save_log_steps: 250 |
|
bucket_name: devinterp-language |
|
s3_folder: checkpoints/H32-dh32 |
|
delete_after_upload: false |
|
push_to_aws: true |
|
project_name: train_slms_pile13m |
|
is_debug: false |
|
group_name: H |
|
job_type: train |
|
notes: null |
|
tags: null |
|
extra_save_steps: |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 1 |
|
- 2 |
|
- 2 |
|
- 2 |
|
- 2 |
|
- 2 |
|
- 2 |
|
- 2 |
|
- 2 |
|
- 2 |
|
- 3 |
|
- 3 |
|
- 3 |
|
- 3 |
|
- 3 |
|
- 3 |
|
- 4 |
|
- 4 |
|
- 4 |
|
- 4 |
|
- 4 |
|
- 5 |
|
- 5 |
|
- 5 |
|
- 5 |
|
- 6 |
|
- 6 |
|
- 6 |
|
- 6 |
|
- 7 |
|
- 7 |
|
- 7 |
|
- 8 |
|
- 8 |
|
- 9 |
|
- 9 |
|
- 9 |
|
- 10 |
|
- 10 |
|
- 11 |
|
- 11 |
|
- 12 |
|
- 13 |
|
- 13 |
|
- 14 |
|
- 14 |
|
- 15 |
|
- 16 |
|
- 17 |
|
- 17 |
|
- 18 |
|
- 19 |
|
- 20 |
|
- 21 |
|
- 22 |
|
- 23 |
|
- 24 |
|
- 25 |
|
- 26 |
|
- 28 |
|
- 29 |
|
- 30 |
|
- 32 |
|
- 33 |
|
- 35 |
|
- 36 |
|
- 38 |
|
- 40 |
|
- 42 |
|
- 44 |
|
- 46 |
|
- 48 |
|
- 50 |
|
- 52 |
|
- 55 |
|
- 57 |
|
- 60 |
|
- 63 |
|
- 66 |
|
- 69 |
|
- 72 |
|
- 75 |
|
- 79 |
|
- 82 |
|
- 86 |
|
- 90 |
|
- 94 |
|
- 99 |
|
- 103 |
|
- 108 |
|
- 113 |
|
- 118 |
|
- 124 |
|
- 130 |
|
- 136 |
|
- 142 |
|
- 149 |
|
- 155 |
|
- 163 |
|
- 170 |
|
- 178 |
|
- 186 |
|
- 195 |
|
- 204 |
|
- 213 |
|
- 223 |
|
- 233 |
|
- 244 |
|
- 255 |
|
- 267 |
|
- 280 |
|
- 293 |
|
- 306 |
|
- 320 |
|
- 335 |
|
- 350 |
|
- 367 |
|
- 384 |
|
- 401 |
|
- 420 |
|
- 439 |
|
- 459 |
|
- 481 |
|
- 503 |
|
- 526 |
|
- 550 |
|
- 576 |
|
- 602 |
|
- 630 |
|
- 659 |
|
- 690 |
|
- 721 |
|
- 755 |
|
- 789 |
|
- 826 |
|
- 864 |
|
- 904 |
|
- 946 |
|
- 989 |
|
- 1035 |
|
- 1083 |
|
- 1133 |
|
- 1185 |
|
- 1239 |
|
- 1297 |
|
- 1356 |
|
- 1419 |
|
- 1485 |
|
- 1553 |
|
- 1625 |
|
- 1700 |
|
- 1778 |
|
- 1860 |
|
- 1946 |
|
- 2035 |
|
- 2129 |
|
- 2228 |
|
- 2330 |
|
- 2438 |
|
- 2550 |
|
- 2668 |
|
- 2791 |
|
- 2920 |
|
- 3054 |
|
- 3195 |
|
- 3343 |
|
- 3497 |
|
- 3658 |
|
- 3827 |
|
- 4003 |
|
- 4188 |
|
- 4381 |
|
- 4583 |
|
- 4794 |
|
- 5015 |
|
- 5247 |
|
- 5489 |
|
- 5742 |
|
- 6007 |
|
- 6284 |
|
- 6573 |
|
- 6876 |
|
- 7194 |
|
- 7525 |
|
- 7872 |
|
- 8235 |
|
- 8615 |
|
- 9012 |
|
- 9428 |
|
- 9863 |
|
- 10318 |
|
- 10794 |
|
- 11291 |
|
- 11812 |
|
- 12357 |
|
- 12926 |
|
- 13523 |
|
- 14146 |
|
- 14799 |
|
- 15481 |
|
- 16195 |
|
- 16942 |
|
- 17723 |
|
- 18540 |
|
- 19395 |
|
- 20290 |
|
- 21225 |
|
- 22204 |
|
- 23228 |
|
- 24299 |
|
- 25420 |
|
- 26592 |
|
- 27818 |
|
- 29101 |
|
- 30443 |
|
- 31847 |
|
- 33315 |
|
- 34851 |
|
- 36458 |
|
- 38140 |
|
- 39898 |
|
- 41738 |
|
- 43663 |
|
- 45676 |
|
- 47783 |
|
- 49986 |
|
- 52291 |
|
- 54703 |
|
- 57225 |
|
- 59864 |
|
- 62624 |
|
- 65512 |
|
- 68533 |
|
- 71693 |
|
- 75000 |
|
|