H32-dh32 / training.yaml
jqhoogland's picture
Upload final model (step 75000) and all checkpoints at 2024-10-17T04:11:25.607794
3ae0566 verified
!!python/object:aether.train.train.TrainingArguments
output_dir: /mnt/disks/persist/data/checkpoints/H32-dh32
overwrite_output_dir: false
do_train: false
do_eval: false
do_predict: false
eval_strategy: 'no'
prediction_loss_only: false
per_device_train_batch_size: 32
per_device_eval_batch_size: 8
per_gpu_train_batch_size: null
per_gpu_eval_batch_size: null
gradient_accumulation_steps: 1
eval_accumulation_steps: null
eval_delay: 0
torch_empty_cache_steps: null
learning_rate: 0.001
weight_decay: 0.05
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
max_grad_norm: 1.0
num_train_epochs: 3.0
max_steps: 75000
lr_scheduler_type: constant
lr_scheduler_kwargs: {}
warmup_ratio: 0.0
warmup_steps: 0
log_level: passive
log_level_replica: warning
log_on_each_node: true
logging_dir: null
logging_strategy: steps
logging_first_step: true
logging_steps: 250
logging_nan_inf_filter: true
save_strategy: steps
save_steps: 300
save_total_limit: null
save_safetensors: true
save_on_each_node: false
save_only_model: false
restore_callback_states_from_checkpoint: false
no_cuda: false
use_cpu: false
use_mps_device: false
seed: 42
data_seed: null
jit_mode_eval: false
use_ipex: false
bf16: false
fp16: false
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: false
fp16_full_eval: false
tf32: null
local_rank: -1
ddp_backend: null
tpu_num_cores: null
tpu_metrics_debug: false
debug: ''
dataloader_drop_last: false
eval_steps: null
dataloader_num_workers: 0
dataloader_prefetch_factor: null
past_index: -1
run_name: H32-dh32
disable_tqdm: null
remove_unused_columns: false
label_names:
- input_ids
load_best_model_at_end: false
metric_for_best_model: null
greater_is_better: null
ignore_data_skip: false
fsdp: ''
fsdp_min_num_params: 0
fsdp_config: null
fsdp_transformer_layer_cls_to_wrap: null
accelerator_config: null
deepspeed: null
label_smoothing_factor: 0.0
optim: adamw_torch
optim_args: null
adafactor: false
group_by_length: false
length_column_name: length
report_to: null
ddp_find_unused_parameters: null
ddp_bucket_cap_mb: null
ddp_broadcast_buffers: null
dataloader_pin_memory: true
dataloader_persistent_workers: false
skip_memory_metrics: true
use_legacy_prediction_loop: false
push_to_hub: false
resume_from_checkpoint: null
hub_model_id: timaeus/H32-dh32
hub_strategy: every_save
hub_token: null
hub_private_repo: false
hub_always_push: false
gradient_checkpointing: false
gradient_checkpointing_kwargs: null
include_inputs_for_metrics: false
eval_do_concat_batches: true
fp16_backend: auto
evaluation_strategy: null
push_to_hub_model_id: null
push_to_hub_organization: null
push_to_hub_token: null
mp_parameters: ''
auto_find_batch_size: false
full_determinism: false
torchdynamo: null
ray_scope: last
ddp_timeout: 1800
torch_compile: false
torch_compile_backend: null
torch_compile_mode: null
dispatch_batches: null
split_batches: null
include_tokens_per_second: false
include_num_input_tokens_seen: false
neftune_noise_alpha: null
optim_target_modules: null
batch_eval_metrics: false
eval_on_start: false
use_liger_kernel: false
eval_use_gather_object: false
checkpoints_dir: /mnt/disks/persist/data/checkpoints
save_log_steps: 250
bucket_name: devinterp-language
s3_folder: checkpoints/H32-dh32
delete_after_upload: false
push_to_aws: true
project_name: train_slms_pile13m
is_debug: false
group_name: H
job_type: train
notes: null
tags: null
extra_save_steps:
- 1
- 1
- 1
- 1
- 1
- 1
- 1
- 1
- 1
- 1
- 1
- 1
- 1
- 1
- 1
- 1
- 2
- 2
- 2
- 2
- 2
- 2
- 2
- 2
- 2
- 3
- 3
- 3
- 3
- 3
- 3
- 4
- 4
- 4
- 4
- 4
- 5
- 5
- 5
- 5
- 6
- 6
- 6
- 6
- 7
- 7
- 7
- 8
- 8
- 9
- 9
- 9
- 10
- 10
- 11
- 11
- 12
- 13
- 13
- 14
- 14
- 15
- 16
- 17
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 28
- 29
- 30
- 32
- 33
- 35
- 36
- 38
- 40
- 42
- 44
- 46
- 48
- 50
- 52
- 55
- 57
- 60
- 63
- 66
- 69
- 72
- 75
- 79
- 82
- 86
- 90
- 94
- 99
- 103
- 108
- 113
- 118
- 124
- 130
- 136
- 142
- 149
- 155
- 163
- 170
- 178
- 186
- 195
- 204
- 213
- 223
- 233
- 244
- 255
- 267
- 280
- 293
- 306
- 320
- 335
- 350
- 367
- 384
- 401
- 420
- 439
- 459
- 481
- 503
- 526
- 550
- 576
- 602
- 630
- 659
- 690
- 721
- 755
- 789
- 826
- 864
- 904
- 946
- 989
- 1035
- 1083
- 1133
- 1185
- 1239
- 1297
- 1356
- 1419
- 1485
- 1553
- 1625
- 1700
- 1778
- 1860
- 1946
- 2035
- 2129
- 2228
- 2330
- 2438
- 2550
- 2668
- 2791
- 2920
- 3054
- 3195
- 3343
- 3497
- 3658
- 3827
- 4003
- 4188
- 4381
- 4583
- 4794
- 5015
- 5247
- 5489
- 5742
- 6007
- 6284
- 6573
- 6876
- 7194
- 7525
- 7872
- 8235
- 8615
- 9012
- 9428
- 9863
- 10318
- 10794
- 11291
- 11812
- 12357
- 12926
- 13523
- 14146
- 14799
- 15481
- 16195
- 16942
- 17723
- 18540
- 19395
- 20290
- 21225
- 22204
- 23228
- 24299
- 25420
- 26592
- 27818
- 29101
- 30443
- 31847
- 33315
- 34851
- 36458
- 38140
- 39898
- 41738
- 43663
- 45676
- 47783
- 49986
- 52291
- 54703
- 57225
- 59864
- 62624
- 65512
- 68533
- 71693
- 75000