pietrolesci commited on
Commit
7b97af2
1 Parent(s): bf06c0c

Training in progress, epoch 1

Browse files
.hydra/config.yaml ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: EleutherAI/pythia-14m
3
+ alias: pythia-14m
4
+ revision: null
5
+ subfolder: null
6
+ precision: bf16
7
+ set_eos_to_pad: true
8
+ dataset:
9
+ name: gsm8k
10
+ alias: ${dataset.name}
11
+ text_field: question
12
+ max_length: 1024
13
+ trainer:
14
+ group_by_length: false
15
+ remove_unused_columns: true
16
+ neftune_noise_alpha: null
17
+ eval_accumulation_steps: 1
18
+ per_device_train_batch_size: 32
19
+ per_device_eval_batch_size: 20
20
+ gradient_accumulation_steps: 1
21
+ dataloader_num_workers: 8
22
+ dataloader_drop_last: false
23
+ optim: adamw_torch_fused
24
+ adafactor: false
25
+ learning_rate: 0.0001
26
+ weight_decay: 0
27
+ adam_beta1: 0.9
28
+ adam_beta2: 0.999
29
+ adam_epsilon: 1.0e-08
30
+ max_grad_norm: 1.0
31
+ lr_scheduler_type: linear
32
+ warmup_ratio: 0.0
33
+ warmup_steps: 0
34
+ num_train_epochs: 1
35
+ max_steps: -1
36
+ eval_steps: 100
37
+ output_dir: ./
38
+ logging_strategy: steps
39
+ logging_first_step: true
40
+ logging_steps: 1
41
+ log_level: info
42
+ report_to: tensorboard
43
+ logging_dir: tb_logs
44
+ disable_tqdm: false
45
+ push_to_hub: true
46
+ save_strategy: epoch
47
+ save_steps: ${trainer.eval_steps}
48
+ save_only_model: true
49
+ seed: ${global_seed}
50
+ data_seed: ${global_seed}
51
+ full_determinism: true
52
+ tf32: true
53
+ lora:
54
+ r: 64
55
+ lora_alpha: 16
56
+ bias: none
57
+ task_type: CAUSAL_LM
58
+ target_modules: null
59
+ use_peft: true
60
+ global_seed: 42
61
+ experiment_group: training
62
+ run_name: ${model.alias}_${now:%Y-%m-%d}T${now:%H-%M-%S}
.hydra/hydra.yaml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hydra:
2
+ run:
3
+ dir: ./outputs/${experiment_group}/${dataset.alias}/${run_name}
4
+ sweep:
5
+ dir: ./outputs/multirun/${experiment_group}
6
+ subdir: ${dataset.alias}/${run_name}_${hydra.job.id}
7
+ launcher:
8
+ _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
9
+ sweeper:
10
+ _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
11
+ max_batch_size: null
12
+ params: null
13
+ help:
14
+ app_name: ${hydra.job.name}
15
+ header: '${hydra.help.app_name} is powered by Hydra.
16
+
17
+ '
18
+ footer: 'Powered by Hydra (https://hydra.cc)
19
+
20
+ Use --hydra-help to view Hydra specific help
21
+
22
+ '
23
+ template: '${hydra.help.header}
24
+
25
+ == Configuration groups ==
26
+
27
+ Compose your configuration from those groups (group=option)
28
+
29
+
30
+ $APP_CONFIG_GROUPS
31
+
32
+
33
+ == Config ==
34
+
35
+ Override anything in the config (foo.bar=value)
36
+
37
+
38
+ $CONFIG
39
+
40
+
41
+ ${hydra.help.footer}
42
+
43
+ '
44
+ hydra_help:
45
+ template: 'Hydra (${hydra.runtime.version})
46
+
47
+ See https://hydra.cc for more info.
48
+
49
+
50
+ == Flags ==
51
+
52
+ $FLAGS_HELP
53
+
54
+
55
+ == Configuration groups ==
56
+
57
+ Compose your configuration from those groups (For example, append hydra/job_logging=disabled
58
+ to command line)
59
+
60
+
61
+ $HYDRA_CONFIG_GROUPS
62
+
63
+
64
+ Use ''--cfg hydra'' to Show the Hydra config.
65
+
66
+ '
67
+ hydra_help: ???
68
+ hydra_logging:
69
+ version: 1
70
+ formatters:
71
+ colorlog:
72
+ (): colorlog.ColoredFormatter
73
+ format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
74
+ handlers:
75
+ console:
76
+ class: logging.StreamHandler
77
+ formatter: colorlog
78
+ stream: ext://sys.stdout
79
+ root:
80
+ level: INFO
81
+ handlers:
82
+ - console
83
+ disable_existing_loggers: false
84
+ job_logging:
85
+ version: 1
86
+ formatters:
87
+ simple:
88
+ format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
89
+ colorlog:
90
+ (): colorlog.ColoredFormatter
91
+ format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
92
+ - %(message)s'
93
+ log_colors:
94
+ DEBUG: purple
95
+ INFO: green
96
+ WARNING: yellow
97
+ ERROR: red
98
+ CRITICAL: bold_red
99
+ handlers:
100
+ console:
101
+ class: logging.StreamHandler
102
+ formatter: colorlog
103
+ stream: ext://sys.stdout
104
+ file:
105
+ class: logging.FileHandler
106
+ formatter: simple
107
+ filename: ${hydra.job.name}.log
108
+ root:
109
+ level: INFO
110
+ handlers:
111
+ - console
112
+ - file
113
+ disable_existing_loggers: false
114
+ env: {}
115
+ mode: RUN
116
+ searchpath: []
117
+ callbacks: {}
118
+ output_subdir: .hydra
119
+ overrides:
120
+ hydra:
121
+ - hydra.mode=RUN
122
+ task: []
123
+ job:
124
+ name: trl_main
125
+ chdir: true
126
+ override_dirname: ''
127
+ id: ???
128
+ num: ???
129
+ config_name: trl_conf
130
+ env_set: {}
131
+ env_copy: []
132
+ config:
133
+ override_dirname:
134
+ kv_sep: '='
135
+ item_sep: ','
136
+ exclude_keys: []
137
+ runtime:
138
+ version: 1.3.2
139
+ version_base: '1.3'
140
+ cwd: /home/pl487/weak-to-strong-generalisation
141
+ config_sources:
142
+ - path: hydra.conf
143
+ schema: pkg
144
+ provider: hydra
145
+ - path: /home/pl487/weak-to-strong-generalisation/conf
146
+ schema: file
147
+ provider: main
148
+ - path: hydra_plugins.hydra_colorlog.conf
149
+ schema: pkg
150
+ provider: hydra-colorlog
151
+ - path: ''
152
+ schema: structured
153
+ provider: schema
154
+ output_dir: /home/pl487/weak-to-strong-generalisation/outputs/training/gsm8k/pythia-14m_2024-01-17T00-07-52
155
+ choices:
156
+ hydra/env: default
157
+ hydra/callbacks: null
158
+ hydra/job_logging: colorlog
159
+ hydra/hydra_logging: colorlog
160
+ hydra/hydra_help: default
161
+ hydra/help: default
162
+ hydra/sweeper: basic
163
+ hydra/launcher: basic
164
+ hydra/output: default
165
+ verbose: false
.hydra/overrides.yaml ADDED
@@ -0,0 +1 @@
 
 
1
+ []
adapter_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "EleutherAI/pythia-14m",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 16,
13
+ "lora_dropout": 0.0,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 64,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "query_key_value"
23
+ ],
24
+ "task_type": "CAUSAL_LM"
25
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc82b0b9d4cb85d528900aa127887e88071e0937803991945ce1121027f87761
3
+ size 394952
hparams.yaml ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: EleutherAI/pythia-14m
3
+ alias: pythia-14m
4
+ revision: null
5
+ subfolder: null
6
+ precision: bf16
7
+ set_eos_to_pad: true
8
+ dataset:
9
+ name: gsm8k
10
+ alias: gsm8k
11
+ text_field: question
12
+ max_length: 1024
13
+ trainer:
14
+ group_by_length: false
15
+ remove_unused_columns: true
16
+ neftune_noise_alpha: null
17
+ eval_accumulation_steps: 1
18
+ per_device_train_batch_size: 32
19
+ per_device_eval_batch_size: 20
20
+ gradient_accumulation_steps: 1
21
+ dataloader_num_workers: 8
22
+ dataloader_drop_last: false
23
+ optim: adamw_torch_fused
24
+ adafactor: false
25
+ learning_rate: 0.0001
26
+ weight_decay: 0
27
+ adam_beta1: 0.9
28
+ adam_beta2: 0.999
29
+ adam_epsilon: 1.0e-08
30
+ max_grad_norm: 1.0
31
+ lr_scheduler_type: linear
32
+ warmup_ratio: 0.0
33
+ warmup_steps: 0
34
+ num_train_epochs: 1
35
+ max_steps: -1
36
+ eval_steps: 100
37
+ output_dir: ./
38
+ logging_strategy: steps
39
+ logging_first_step: true
40
+ logging_steps: 1
41
+ log_level: info
42
+ report_to: tensorboard
43
+ logging_dir: tb_logs
44
+ disable_tqdm: false
45
+ push_to_hub: true
46
+ save_strategy: epoch
47
+ save_steps: 100
48
+ save_only_model: true
49
+ seed: 42
50
+ data_seed: 42
51
+ full_determinism: true
52
+ tf32: true
53
+ lora:
54
+ r: 64
55
+ lora_alpha: 16
56
+ bias: none
57
+ task_type: CAUSAL_LM
58
+ target_modules: null
59
+ use_peft: true
60
+ global_seed: 42
61
+ experiment_group: training
62
+ run_name: pythia-14m_2024-01-17T00-07-52
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tb_logs/events.out.tfevents.1705450080.dev-gpu-pl487.610521.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19878075f35cd2790962f062c7e641249fb5b7006557191959a835c7f04c5c77
3
+ size 41054
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|padding|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "50254": {
21
+ "content": " ",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "50255": {
29
+ "content": " ",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
+ "50256": {
37
+ "content": " ",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": false
43
+ },
44
+ "50257": {
45
+ "content": " ",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "50258": {
53
+ "content": " ",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "50259": {
61
+ "content": " ",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "50260": {
69
+ "content": " ",
70
+ "lstrip": false,
71
+ "normalized": true,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "50261": {
77
+ "content": " ",
78
+ "lstrip": false,
79
+ "normalized": true,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "50262": {
85
+ "content": " ",
86
+ "lstrip": false,
87
+ "normalized": true,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "50263": {
93
+ "content": " ",
94
+ "lstrip": false,
95
+ "normalized": true,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "50264": {
101
+ "content": " ",
102
+ "lstrip": false,
103
+ "normalized": true,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "50265": {
109
+ "content": " ",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "50266": {
117
+ "content": " ",
118
+ "lstrip": false,
119
+ "normalized": true,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "50267": {
125
+ "content": " ",
126
+ "lstrip": false,
127
+ "normalized": true,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "50268": {
133
+ "content": " ",
134
+ "lstrip": false,
135
+ "normalized": true,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "50269": {
141
+ "content": " ",
142
+ "lstrip": false,
143
+ "normalized": true,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "50270": {
149
+ "content": " ",
150
+ "lstrip": false,
151
+ "normalized": true,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "50271": {
157
+ "content": " ",
158
+ "lstrip": false,
159
+ "normalized": true,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "50272": {
165
+ "content": " ",
166
+ "lstrip": false,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "50273": {
173
+ "content": " ",
174
+ "lstrip": false,
175
+ "normalized": true,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "50274": {
181
+ "content": " ",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "50275": {
189
+ "content": " ",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "50276": {
197
+ "content": " ",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ }
204
+ },
205
+ "bos_token": "<|endoftext|>",
206
+ "clean_up_tokenization_spaces": true,
207
+ "eos_token": "<|endoftext|>",
208
+ "model_max_length": 1000000000000000019884624838656,
209
+ "pad_token": "<|endoftext|>",
210
+ "tokenizer_class": "GPTNeoXTokenizer",
211
+ "unk_token": "<|endoftext|>"
212
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2c7d457c4198d805c663dd4afba6ca909e4ef791bef0dff9f9d3f8616bf0b31
3
+ size 4664
trl_main.log ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2024-01-17 00:07:52,551][hydra][INFO] -
2
+ model:
3
+ name: EleutherAI/pythia-14m
4
+ alias: pythia-14m
5
+ revision: null
6
+ subfolder: null
7
+ precision: bf16
8
+ set_eos_to_pad: true
9
+ dataset:
10
+ name: gsm8k
11
+ alias: gsm8k
12
+ text_field: question
13
+ max_length: 1024
14
+ trainer:
15
+ group_by_length: false
16
+ remove_unused_columns: true
17
+ neftune_noise_alpha: null
18
+ eval_accumulation_steps: 1
19
+ per_device_train_batch_size: 32
20
+ per_device_eval_batch_size: 20
21
+ gradient_accumulation_steps: 1
22
+ dataloader_num_workers: 8
23
+ dataloader_drop_last: false
24
+ optim: adamw_torch_fused
25
+ adafactor: false
26
+ learning_rate: 0.0001
27
+ weight_decay: 0
28
+ adam_beta1: 0.9
29
+ adam_beta2: 0.999
30
+ adam_epsilon: 1.0e-08
31
+ max_grad_norm: 1.0
32
+ lr_scheduler_type: linear
33
+ warmup_ratio: 0.0
34
+ warmup_steps: 0
35
+ num_train_epochs: 1
36
+ max_steps: -1
37
+ eval_steps: 100
38
+ output_dir: ./
39
+ logging_strategy: steps
40
+ logging_first_step: true
41
+ logging_steps: 1
42
+ log_level: info
43
+ report_to: tensorboard
44
+ logging_dir: tb_logs
45
+ disable_tqdm: false
46
+ push_to_hub: true
47
+ save_strategy: epoch
48
+ save_steps: 100
49
+ save_only_model: true
50
+ seed: 42
51
+ data_seed: 42
52
+ full_determinism: true
53
+ tf32: true
54
+ lora:
55
+ r: 64
56
+ lora_alpha: 16
57
+ bias: none
58
+ task_type: CAUSAL_LM
59
+ target_modules: null
60
+ use_peft: true
61
+ global_seed: 42
62
+ experiment_group: training
63
+ run_name: pythia-14m_2024-01-17T00-07-52
64
+
65
+ ======================================================================