model: | |
name: EleutherAI/pythia-14m | |
alias: pythia-14m | |
revision: null | |
subfolder: null | |
precision: bf16 | |
set_eos_to_pad: true | |
dataset: | |
name: gsm8k | |
alias: gsm8k | |
text_field: question | |
max_length: 1024 | |
trainer: | |
group_by_length: false | |
remove_unused_columns: true | |
neftune_noise_alpha: null | |
eval_accumulation_steps: 1 | |
per_device_train_batch_size: 32 | |
per_device_eval_batch_size: 20 | |
gradient_accumulation_steps: 1 | |
dataloader_num_workers: 8 | |
dataloader_drop_last: false | |
optim: adamw_torch_fused | |
adafactor: false | |
learning_rate: 0.0001 | |
weight_decay: 0 | |
adam_beta1: 0.9 | |
adam_beta2: 0.999 | |
adam_epsilon: 1.0e-08 | |
max_grad_norm: 1.0 | |
lr_scheduler_type: linear | |
warmup_ratio: 0.0 | |
warmup_steps: 0 | |
num_train_epochs: 1 | |
max_steps: -1 | |
eval_steps: 100 | |
output_dir: ./ | |
logging_strategy: steps | |
logging_first_step: true | |
logging_steps: 1 | |
log_level: info | |
report_to: tensorboard | |
logging_dir: tb_logs | |
disable_tqdm: false | |
push_to_hub: true | |
save_strategy: epoch | |
save_steps: 100 | |
save_only_model: true | |
seed: 42 | |
data_seed: 42 | |
full_determinism: true | |
tf32: true | |
lora: | |
r: 64 | |
lora_alpha: 16 | |
bias: none | |
task_type: CAUSAL_LM | |
target_modules: null | |
use_peft: true | |
global_seed: 42 | |
experiment_group: training | |
run_name: pythia-14m_2024-01-17T00-07-52 | |