File size: 1,415 Bytes
7b97af2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
[2024-01-17 00:07:52,551][hydra][INFO] - 
model:
  name: EleutherAI/pythia-14m
  alias: pythia-14m
  revision: null
  subfolder: null
  precision: bf16
  set_eos_to_pad: true
dataset:
  name: gsm8k
  alias: gsm8k
  text_field: question
  max_length: 1024
trainer:
  group_by_length: false
  remove_unused_columns: true
  neftune_noise_alpha: null
  eval_accumulation_steps: 1
  per_device_train_batch_size: 32
  per_device_eval_batch_size: 20
  gradient_accumulation_steps: 1
  dataloader_num_workers: 8
  dataloader_drop_last: false
  optim: adamw_torch_fused
  adafactor: false
  learning_rate: 0.0001
  weight_decay: 0
  adam_beta1: 0.9
  adam_beta2: 0.999
  adam_epsilon: 1.0e-08
  max_grad_norm: 1.0
  lr_scheduler_type: linear
  warmup_ratio: 0.0
  warmup_steps: 0
  num_train_epochs: 1
  max_steps: -1
  eval_steps: 100
  output_dir: ./
  logging_strategy: steps
  logging_first_step: true
  logging_steps: 1
  log_level: info
  report_to: tensorboard
  logging_dir: tb_logs
  disable_tqdm: false
  push_to_hub: true
  save_strategy: epoch
  save_steps: 100
  save_only_model: true
  seed: 42
  data_seed: 42
  full_determinism: true
  tf32: true
lora:
  r: 64
  lora_alpha: 16
  bias: none
  task_type: CAUSAL_LM
  target_modules: null
use_peft: true
global_seed: 42
experiment_group: training
run_name: pythia-14m_2024-01-17T00-07-52

======================================================================