shyam-incedoinc commited on
Commit
0f1ec37
β€’
1 Parent(s): 79c9318

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - autotrain
4
+ - text-generation
5
+ widget:
6
+ - text: "I love AutoTrain because "
7
+ ---
8
+
9
+ # Model Trained Using AutoTrain
adapter_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "codellama/CodeLlama-7b-Instruct-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "lora_alpha": 32,
12
+ "lora_dropout": 0.05,
13
+ "modules_to_save": null,
14
+ "peft_type": "LORA",
15
+ "r": 16,
16
+ "rank_pattern": {},
17
+ "revision": null,
18
+ "target_modules": [
19
+ "q_proj",
20
+ "v_proj"
21
+ ],
22
+ "task_type": "CAUSAL_LM"
23
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1185258ae99c4b9408da7b7f16005cabb99c6960e92017566ebb1d10bbae6e
3
+ size 33600461
checkpoint-168/README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: False
29
+ - bnb_4bit_compute_dtype: float16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.6.0.dev0
33
+
34
+ - PEFT 0.6.0.dev0
checkpoint-168/adapter_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "codellama/CodeLlama-7b-Instruct-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "lora_alpha": 32,
12
+ "lora_dropout": 0.05,
13
+ "modules_to_save": null,
14
+ "peft_type": "LORA",
15
+ "r": 16,
16
+ "rank_pattern": {},
17
+ "revision": null,
18
+ "target_modules": [
19
+ "q_proj",
20
+ "v_proj"
21
+ ],
22
+ "task_type": "CAUSAL_LM"
23
+ }
checkpoint-168/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1185258ae99c4b9408da7b7f16005cabb99c6960e92017566ebb1d10bbae6e
3
+ size 33600461
checkpoint-168/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42d35455db9d1c064be5ef3846e1a3105185f4710b26170c592139448306bd42
3
+ size 67216581
checkpoint-168/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e4aa178e9a96e204b49de89d05318c24a70e6dde8236f8abea89e420fa1e22
3
+ size 14575
checkpoint-168/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59716b389a5c03b070913c1ab82c9ec436579eb68dd823903ed3cd557b121bd7
3
+ size 627
checkpoint-168/special_tokens_map.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": "<s>",
9
+ "eos_token": "</s>",
10
+ "pad_token": "</s>",
11
+ "unk_token": "<unk>"
12
+ }
checkpoint-168/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-168/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
+ size 500058
checkpoint-168/tokenizer_config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32007": {
28
+ "content": "▁<PRE>",
29
+ "lstrip": true,
30
+ "normalized": false,
31
+ "rstrip": true,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32008": {
36
+ "content": "▁<SUF>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": true,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32009": {
44
+ "content": "▁<MID>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": true,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32010": {
52
+ "content": "▁<EOT>",
53
+ "lstrip": true,
54
+ "normalized": false,
55
+ "rstrip": true,
56
+ "single_word": false,
57
+ "special": true
58
+ }
59
+ },
60
+ "additional_special_tokens": [
61
+ "▁<PRE>",
62
+ "▁<MID>",
63
+ "▁<SUF>",
64
+ "▁<EOT>"
65
+ ],
66
+ "bos_token": "<s>",
67
+ "clean_up_tokenization_spaces": false,
68
+ "eos_token": "</s>",
69
+ "eot_token": "▁<EOT>",
70
+ "fill_token": "<FILL_ME>",
71
+ "legacy": null,
72
+ "middle_token": "▁<MID>",
73
+ "model_max_length": 1024,
74
+ "pad_token": null,
75
+ "prefix_token": "▁<PRE>",
76
+ "sp_model_kwargs": {},
77
+ "suffix_token": "▁<SUF>",
78
+ "tokenizer_class": "CodeLlamaTokenizer",
79
+ "unk_token": "<unk>",
80
+ "use_default_system_prompt": false
81
+ }
checkpoint-168/trainer_state.json ADDED
@@ -0,0 +1,1083 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.47534212470054626,
3
+ "best_model_checkpoint": "qa-code-finetune/checkpoint-168",
4
+ "epoch": 6.413793103448276,
5
+ "eval_steps": 500,
6
+ "global_step": 168,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "learning_rate": 3.448275862068966e-06,
14
+ "loss": 1.4775,
15
+ "step": 1
16
+ },
17
+ {
18
+ "epoch": 0.03,
19
+ "learning_rate": 6.896551724137932e-06,
20
+ "loss": 1.1041,
21
+ "step": 2
22
+ },
23
+ {
24
+ "epoch": 0.05,
25
+ "learning_rate": 1.0344827586206897e-05,
26
+ "loss": 1.3887,
27
+ "step": 3
28
+ },
29
+ {
30
+ "epoch": 0.07,
31
+ "learning_rate": 1.3793103448275863e-05,
32
+ "loss": 0.8684,
33
+ "step": 4
34
+ },
35
+ {
36
+ "epoch": 0.09,
37
+ "learning_rate": 1.7241379310344828e-05,
38
+ "loss": 1.5696,
39
+ "step": 5
40
+ },
41
+ {
42
+ "epoch": 0.1,
43
+ "learning_rate": 2.0689655172413793e-05,
44
+ "loss": 1.3935,
45
+ "step": 6
46
+ },
47
+ {
48
+ "epoch": 0.12,
49
+ "learning_rate": 2.413793103448276e-05,
50
+ "loss": 1.3506,
51
+ "step": 7
52
+ },
53
+ {
54
+ "epoch": 0.14,
55
+ "learning_rate": 2.7586206896551727e-05,
56
+ "loss": 1.4628,
57
+ "step": 8
58
+ },
59
+ {
60
+ "epoch": 0.16,
61
+ "learning_rate": 3.103448275862069e-05,
62
+ "loss": 1.3917,
63
+ "step": 9
64
+ },
65
+ {
66
+ "epoch": 0.17,
67
+ "learning_rate": 3.4482758620689657e-05,
68
+ "loss": 1.5045,
69
+ "step": 10
70
+ },
71
+ {
72
+ "epoch": 0.19,
73
+ "learning_rate": 3.793103448275862e-05,
74
+ "loss": 1.4222,
75
+ "step": 11
76
+ },
77
+ {
78
+ "epoch": 0.21,
79
+ "learning_rate": 4.1379310344827587e-05,
80
+ "loss": 1.3411,
81
+ "step": 12
82
+ },
83
+ {
84
+ "epoch": 0.22,
85
+ "learning_rate": 4.482758620689655e-05,
86
+ "loss": 1.4104,
87
+ "step": 13
88
+ },
89
+ {
90
+ "epoch": 0.24,
91
+ "learning_rate": 4.827586206896552e-05,
92
+ "loss": 1.2848,
93
+ "step": 14
94
+ },
95
+ {
96
+ "epoch": 0.26,
97
+ "learning_rate": 5.172413793103449e-05,
98
+ "loss": 1.3306,
99
+ "step": 15
100
+ },
101
+ {
102
+ "epoch": 0.28,
103
+ "learning_rate": 5.517241379310345e-05,
104
+ "loss": 1.228,
105
+ "step": 16
106
+ },
107
+ {
108
+ "epoch": 0.29,
109
+ "learning_rate": 5.862068965517241e-05,
110
+ "loss": 1.1974,
111
+ "step": 17
112
+ },
113
+ {
114
+ "epoch": 0.31,
115
+ "learning_rate": 6.206896551724138e-05,
116
+ "loss": 1.0348,
117
+ "step": 18
118
+ },
119
+ {
120
+ "epoch": 0.33,
121
+ "learning_rate": 6.551724137931034e-05,
122
+ "loss": 1.14,
123
+ "step": 19
124
+ },
125
+ {
126
+ "epoch": 0.34,
127
+ "learning_rate": 6.896551724137931e-05,
128
+ "loss": 1.2986,
129
+ "step": 20
130
+ },
131
+ {
132
+ "epoch": 0.36,
133
+ "learning_rate": 7.241379310344828e-05,
134
+ "loss": 1.29,
135
+ "step": 21
136
+ },
137
+ {
138
+ "epoch": 0.38,
139
+ "learning_rate": 7.586206896551724e-05,
140
+ "loss": 1.0764,
141
+ "step": 22
142
+ },
143
+ {
144
+ "epoch": 0.4,
145
+ "learning_rate": 7.931034482758621e-05,
146
+ "loss": 1.244,
147
+ "step": 23
148
+ },
149
+ {
150
+ "epoch": 0.41,
151
+ "learning_rate": 8.275862068965517e-05,
152
+ "loss": 1.2264,
153
+ "step": 24
154
+ },
155
+ {
156
+ "epoch": 0.41,
157
+ "eval_loss": 1.001452922821045,
158
+ "eval_runtime": 45.5541,
159
+ "eval_samples_per_second": 0.549,
160
+ "eval_steps_per_second": 0.154,
161
+ "step": 24
162
+ },
163
+ {
164
+ "epoch": 1.02,
165
+ "learning_rate": 8.620689655172413e-05,
166
+ "loss": 1.1878,
167
+ "step": 25
168
+ },
169
+ {
170
+ "epoch": 1.03,
171
+ "learning_rate": 8.96551724137931e-05,
172
+ "loss": 1.1258,
173
+ "step": 26
174
+ },
175
+ {
176
+ "epoch": 1.05,
177
+ "learning_rate": 9.310344827586207e-05,
178
+ "loss": 1.199,
179
+ "step": 27
180
+ },
181
+ {
182
+ "epoch": 1.07,
183
+ "learning_rate": 9.655172413793105e-05,
184
+ "loss": 1.0574,
185
+ "step": 28
186
+ },
187
+ {
188
+ "epoch": 1.09,
189
+ "learning_rate": 0.0001,
190
+ "loss": 1.1888,
191
+ "step": 29
192
+ },
193
+ {
194
+ "epoch": 1.1,
195
+ "learning_rate": 0.00010344827586206898,
196
+ "loss": 1.1305,
197
+ "step": 30
198
+ },
199
+ {
200
+ "epoch": 1.12,
201
+ "learning_rate": 0.00010689655172413792,
202
+ "loss": 0.978,
203
+ "step": 31
204
+ },
205
+ {
206
+ "epoch": 1.14,
207
+ "learning_rate": 0.0001103448275862069,
208
+ "loss": 1.1873,
209
+ "step": 32
210
+ },
211
+ {
212
+ "epoch": 1.16,
213
+ "learning_rate": 0.00011379310344827588,
214
+ "loss": 1.0899,
215
+ "step": 33
216
+ },
217
+ {
218
+ "epoch": 1.17,
219
+ "learning_rate": 0.00011724137931034482,
220
+ "loss": 0.9437,
221
+ "step": 34
222
+ },
223
+ {
224
+ "epoch": 1.19,
225
+ "learning_rate": 0.0001206896551724138,
226
+ "loss": 1.0896,
227
+ "step": 35
228
+ },
229
+ {
230
+ "epoch": 1.21,
231
+ "learning_rate": 0.00012413793103448277,
232
+ "loss": 0.8899,
233
+ "step": 36
234
+ },
235
+ {
236
+ "epoch": 1.22,
237
+ "learning_rate": 0.00012758620689655174,
238
+ "loss": 1.0744,
239
+ "step": 37
240
+ },
241
+ {
242
+ "epoch": 1.24,
243
+ "learning_rate": 0.00013103448275862068,
244
+ "loss": 0.9309,
245
+ "step": 38
246
+ },
247
+ {
248
+ "epoch": 1.26,
249
+ "learning_rate": 0.00013448275862068965,
250
+ "loss": 1.0572,
251
+ "step": 39
252
+ },
253
+ {
254
+ "epoch": 1.28,
255
+ "learning_rate": 0.00013793103448275863,
256
+ "loss": 0.94,
257
+ "step": 40
258
+ },
259
+ {
260
+ "epoch": 1.29,
261
+ "learning_rate": 0.0001413793103448276,
262
+ "loss": 0.9554,
263
+ "step": 41
264
+ },
265
+ {
266
+ "epoch": 1.31,
267
+ "learning_rate": 0.00014482758620689657,
268
+ "loss": 0.863,
269
+ "step": 42
270
+ },
271
+ {
272
+ "epoch": 1.33,
273
+ "learning_rate": 0.00014827586206896554,
274
+ "loss": 0.9654,
275
+ "step": 43
276
+ },
277
+ {
278
+ "epoch": 1.34,
279
+ "learning_rate": 0.00015172413793103449,
280
+ "loss": 0.9372,
281
+ "step": 44
282
+ },
283
+ {
284
+ "epoch": 1.36,
285
+ "learning_rate": 0.00015517241379310346,
286
+ "loss": 0.7731,
287
+ "step": 45
288
+ },
289
+ {
290
+ "epoch": 1.38,
291
+ "learning_rate": 0.00015862068965517243,
292
+ "loss": 0.9372,
293
+ "step": 46
294
+ },
295
+ {
296
+ "epoch": 1.4,
297
+ "learning_rate": 0.00016206896551724137,
298
+ "loss": 0.7921,
299
+ "step": 47
300
+ },
301
+ {
302
+ "epoch": 1.41,
303
+ "learning_rate": 0.00016551724137931035,
304
+ "loss": 0.7756,
305
+ "step": 48
306
+ },
307
+ {
308
+ "epoch": 1.41,
309
+ "eval_loss": 0.7165587544441223,
310
+ "eval_runtime": 45.4378,
311
+ "eval_samples_per_second": 0.55,
312
+ "eval_steps_per_second": 0.154,
313
+ "step": 48
314
+ },
315
+ {
316
+ "epoch": 2.02,
317
+ "learning_rate": 0.00016896551724137932,
318
+ "loss": 0.8733,
319
+ "step": 49
320
+ },
321
+ {
322
+ "epoch": 2.03,
323
+ "learning_rate": 0.00017241379310344826,
324
+ "loss": 0.7803,
325
+ "step": 50
326
+ },
327
+ {
328
+ "epoch": 2.05,
329
+ "learning_rate": 0.00017586206896551723,
330
+ "loss": 1.0161,
331
+ "step": 51
332
+ },
333
+ {
334
+ "epoch": 2.07,
335
+ "learning_rate": 0.0001793103448275862,
336
+ "loss": 0.8577,
337
+ "step": 52
338
+ },
339
+ {
340
+ "epoch": 2.09,
341
+ "learning_rate": 0.00018275862068965518,
342
+ "loss": 0.8049,
343
+ "step": 53
344
+ },
345
+ {
346
+ "epoch": 2.1,
347
+ "learning_rate": 0.00018620689655172415,
348
+ "loss": 0.7638,
349
+ "step": 54
350
+ },
351
+ {
352
+ "epoch": 2.12,
353
+ "learning_rate": 0.00018965517241379312,
354
+ "loss": 0.7288,
355
+ "step": 55
356
+ },
357
+ {
358
+ "epoch": 2.14,
359
+ "learning_rate": 0.0001931034482758621,
360
+ "loss": 0.7548,
361
+ "step": 56
362
+ },
363
+ {
364
+ "epoch": 2.16,
365
+ "learning_rate": 0.00019655172413793104,
366
+ "loss": 0.6096,
367
+ "step": 57
368
+ },
369
+ {
370
+ "epoch": 2.17,
371
+ "learning_rate": 0.0002,
372
+ "loss": 0.9043,
373
+ "step": 58
374
+ },
375
+ {
376
+ "epoch": 2.19,
377
+ "learning_rate": 0.0001996168582375479,
378
+ "loss": 0.6023,
379
+ "step": 59
380
+ },
381
+ {
382
+ "epoch": 2.21,
383
+ "learning_rate": 0.0001992337164750958,
384
+ "loss": 0.6247,
385
+ "step": 60
386
+ },
387
+ {
388
+ "epoch": 2.22,
389
+ "learning_rate": 0.00019885057471264367,
390
+ "loss": 0.7316,
391
+ "step": 61
392
+ },
393
+ {
394
+ "epoch": 2.24,
395
+ "learning_rate": 0.0001984674329501916,
396
+ "loss": 0.6398,
397
+ "step": 62
398
+ },
399
+ {
400
+ "epoch": 2.26,
401
+ "learning_rate": 0.00019808429118773948,
402
+ "loss": 0.7006,
403
+ "step": 63
404
+ },
405
+ {
406
+ "epoch": 2.28,
407
+ "learning_rate": 0.00019770114942528738,
408
+ "loss": 0.6344,
409
+ "step": 64
410
+ },
411
+ {
412
+ "epoch": 2.29,
413
+ "learning_rate": 0.00019731800766283525,
414
+ "loss": 0.7474,
415
+ "step": 65
416
+ },
417
+ {
418
+ "epoch": 2.31,
419
+ "learning_rate": 0.00019693486590038314,
420
+ "loss": 0.5874,
421
+ "step": 66
422
+ },
423
+ {
424
+ "epoch": 2.33,
425
+ "learning_rate": 0.00019655172413793104,
426
+ "loss": 0.6165,
427
+ "step": 67
428
+ },
429
+ {
430
+ "epoch": 2.34,
431
+ "learning_rate": 0.00019616858237547893,
432
+ "loss": 0.7373,
433
+ "step": 68
434
+ },
435
+ {
436
+ "epoch": 2.36,
437
+ "learning_rate": 0.00019578544061302683,
438
+ "loss": 0.702,
439
+ "step": 69
440
+ },
441
+ {
442
+ "epoch": 2.38,
443
+ "learning_rate": 0.00019540229885057472,
444
+ "loss": 0.7673,
445
+ "step": 70
446
+ },
447
+ {
448
+ "epoch": 2.4,
449
+ "learning_rate": 0.00019501915708812262,
450
+ "loss": 0.767,
451
+ "step": 71
452
+ },
453
+ {
454
+ "epoch": 2.41,
455
+ "learning_rate": 0.0001946360153256705,
456
+ "loss": 0.712,
457
+ "step": 72
458
+ },
459
+ {
460
+ "epoch": 2.41,
461
+ "eval_loss": 0.5802827477455139,
462
+ "eval_runtime": 45.5609,
463
+ "eval_samples_per_second": 0.549,
464
+ "eval_steps_per_second": 0.154,
465
+ "step": 72
466
+ },
467
+ {
468
+ "epoch": 3.02,
469
+ "learning_rate": 0.0001942528735632184,
470
+ "loss": 0.6086,
471
+ "step": 73
472
+ },
473
+ {
474
+ "epoch": 3.03,
475
+ "learning_rate": 0.00019386973180076628,
476
+ "loss": 0.5534,
477
+ "step": 74
478
+ },
479
+ {
480
+ "epoch": 3.05,
481
+ "learning_rate": 0.00019348659003831417,
482
+ "loss": 0.7017,
483
+ "step": 75
484
+ },
485
+ {
486
+ "epoch": 3.07,
487
+ "learning_rate": 0.0001931034482758621,
488
+ "loss": 0.7676,
489
+ "step": 76
490
+ },
491
+ {
492
+ "epoch": 3.09,
493
+ "learning_rate": 0.00019272030651341,
494
+ "loss": 0.6932,
495
+ "step": 77
496
+ },
497
+ {
498
+ "epoch": 3.1,
499
+ "learning_rate": 0.00019233716475095786,
500
+ "loss": 0.5797,
501
+ "step": 78
502
+ },
503
+ {
504
+ "epoch": 3.12,
505
+ "learning_rate": 0.00019195402298850575,
506
+ "loss": 0.5496,
507
+ "step": 79
508
+ },
509
+ {
510
+ "epoch": 3.14,
511
+ "learning_rate": 0.00019157088122605365,
512
+ "loss": 0.5773,
513
+ "step": 80
514
+ },
515
+ {
516
+ "epoch": 3.16,
517
+ "learning_rate": 0.00019118773946360154,
518
+ "loss": 0.6998,
519
+ "step": 81
520
+ },
521
+ {
522
+ "epoch": 3.17,
523
+ "learning_rate": 0.00019080459770114944,
524
+ "loss": 0.5153,
525
+ "step": 82
526
+ },
527
+ {
528
+ "epoch": 3.19,
529
+ "learning_rate": 0.00019042145593869733,
530
+ "loss": 0.5796,
531
+ "step": 83
532
+ },
533
+ {
534
+ "epoch": 3.21,
535
+ "learning_rate": 0.00019003831417624523,
536
+ "loss": 0.528,
537
+ "step": 84
538
+ },
539
+ {
540
+ "epoch": 3.22,
541
+ "learning_rate": 0.00018965517241379312,
542
+ "loss": 0.5977,
543
+ "step": 85
544
+ },
545
+ {
546
+ "epoch": 3.24,
547
+ "learning_rate": 0.00018927203065134102,
548
+ "loss": 0.5815,
549
+ "step": 86
550
+ },
551
+ {
552
+ "epoch": 3.26,
553
+ "learning_rate": 0.00018888888888888888,
554
+ "loss": 0.6144,
555
+ "step": 87
556
+ },
557
+ {
558
+ "epoch": 3.28,
559
+ "learning_rate": 0.00018850574712643678,
560
+ "loss": 0.6932,
561
+ "step": 88
562
+ },
563
+ {
564
+ "epoch": 3.29,
565
+ "learning_rate": 0.0001881226053639847,
566
+ "loss": 0.6371,
567
+ "step": 89
568
+ },
569
+ {
570
+ "epoch": 3.31,
571
+ "learning_rate": 0.0001877394636015326,
572
+ "loss": 0.6041,
573
+ "step": 90
574
+ },
575
+ {
576
+ "epoch": 3.33,
577
+ "learning_rate": 0.00018735632183908046,
578
+ "loss": 0.5556,
579
+ "step": 91
580
+ },
581
+ {
582
+ "epoch": 3.34,
583
+ "learning_rate": 0.00018697318007662836,
584
+ "loss": 0.5421,
585
+ "step": 92
586
+ },
587
+ {
588
+ "epoch": 3.36,
589
+ "learning_rate": 0.00018659003831417625,
590
+ "loss": 0.7292,
591
+ "step": 93
592
+ },
593
+ {
594
+ "epoch": 3.38,
595
+ "learning_rate": 0.00018620689655172415,
596
+ "loss": 0.4931,
597
+ "step": 94
598
+ },
599
+ {
600
+ "epoch": 3.4,
601
+ "learning_rate": 0.00018582375478927202,
602
+ "loss": 0.6219,
603
+ "step": 95
604
+ },
605
+ {
606
+ "epoch": 3.41,
607
+ "learning_rate": 0.00018544061302681994,
608
+ "loss": 0.6215,
609
+ "step": 96
610
+ },
611
+ {
612
+ "epoch": 3.41,
613
+ "eval_loss": 0.5279306173324585,
614
+ "eval_runtime": 45.1884,
615
+ "eval_samples_per_second": 0.553,
616
+ "eval_steps_per_second": 0.155,
617
+ "step": 96
618
+ },
619
+ {
620
+ "epoch": 4.02,
621
+ "learning_rate": 0.00018505747126436783,
622
+ "loss": 0.5279,
623
+ "step": 97
624
+ },
625
+ {
626
+ "epoch": 4.03,
627
+ "learning_rate": 0.00018467432950191573,
628
+ "loss": 0.6158,
629
+ "step": 98
630
+ },
631
+ {
632
+ "epoch": 4.05,
633
+ "learning_rate": 0.0001842911877394636,
634
+ "loss": 0.479,
635
+ "step": 99
636
+ },
637
+ {
638
+ "epoch": 4.07,
639
+ "learning_rate": 0.0001839080459770115,
640
+ "loss": 0.6229,
641
+ "step": 100
642
+ },
643
+ {
644
+ "epoch": 4.09,
645
+ "learning_rate": 0.0001835249042145594,
646
+ "loss": 0.5331,
647
+ "step": 101
648
+ },
649
+ {
650
+ "epoch": 4.1,
651
+ "learning_rate": 0.00018314176245210728,
652
+ "loss": 0.4522,
653
+ "step": 102
654
+ },
655
+ {
656
+ "epoch": 4.12,
657
+ "learning_rate": 0.00018275862068965518,
658
+ "loss": 0.349,
659
+ "step": 103
660
+ },
661
+ {
662
+ "epoch": 4.14,
663
+ "learning_rate": 0.00018237547892720307,
664
+ "loss": 0.6031,
665
+ "step": 104
666
+ },
667
+ {
668
+ "epoch": 4.16,
669
+ "learning_rate": 0.00018199233716475097,
670
+ "loss": 0.5603,
671
+ "step": 105
672
+ },
673
+ {
674
+ "epoch": 4.17,
675
+ "learning_rate": 0.00018160919540229886,
676
+ "loss": 0.5386,
677
+ "step": 106
678
+ },
679
+ {
680
+ "epoch": 4.19,
681
+ "learning_rate": 0.00018122605363984676,
682
+ "loss": 0.5133,
683
+ "step": 107
684
+ },
685
+ {
686
+ "epoch": 4.21,
687
+ "learning_rate": 0.00018084291187739463,
688
+ "loss": 0.5644,
689
+ "step": 108
690
+ },
691
+ {
692
+ "epoch": 4.22,
693
+ "learning_rate": 0.00018045977011494252,
694
+ "loss": 0.5141,
695
+ "step": 109
696
+ },
697
+ {
698
+ "epoch": 4.24,
699
+ "learning_rate": 0.00018007662835249044,
700
+ "loss": 0.4475,
701
+ "step": 110
702
+ },
703
+ {
704
+ "epoch": 4.26,
705
+ "learning_rate": 0.00017969348659003834,
706
+ "loss": 0.4485,
707
+ "step": 111
708
+ },
709
+ {
710
+ "epoch": 4.28,
711
+ "learning_rate": 0.0001793103448275862,
712
+ "loss": 0.4857,
713
+ "step": 112
714
+ },
715
+ {
716
+ "epoch": 4.29,
717
+ "learning_rate": 0.0001789272030651341,
718
+ "loss": 0.7387,
719
+ "step": 113
720
+ },
721
+ {
722
+ "epoch": 4.31,
723
+ "learning_rate": 0.000178544061302682,
724
+ "loss": 0.5216,
725
+ "step": 114
726
+ },
727
+ {
728
+ "epoch": 4.33,
729
+ "learning_rate": 0.0001781609195402299,
730
+ "loss": 0.4717,
731
+ "step": 115
732
+ },
733
+ {
734
+ "epoch": 4.34,
735
+ "learning_rate": 0.00017777777777777779,
736
+ "loss": 0.4892,
737
+ "step": 116
738
+ },
739
+ {
740
+ "epoch": 4.36,
741
+ "learning_rate": 0.00017739463601532568,
742
+ "loss": 0.5037,
743
+ "step": 117
744
+ },
745
+ {
746
+ "epoch": 4.38,
747
+ "learning_rate": 0.00017701149425287358,
748
+ "loss": 0.5465,
749
+ "step": 118
750
+ },
751
+ {
752
+ "epoch": 4.4,
753
+ "learning_rate": 0.00017662835249042147,
754
+ "loss": 0.5272,
755
+ "step": 119
756
+ },
757
+ {
758
+ "epoch": 4.41,
759
+ "learning_rate": 0.00017624521072796937,
760
+ "loss": 0.5952,
761
+ "step": 120
762
+ },
763
+ {
764
+ "epoch": 4.41,
765
+ "eval_loss": 0.4989665448665619,
766
+ "eval_runtime": 45.2351,
767
+ "eval_samples_per_second": 0.553,
768
+ "eval_steps_per_second": 0.155,
769
+ "step": 120
770
+ },
771
+ {
772
+ "epoch": 5.02,
773
+ "learning_rate": 0.00017586206896551723,
774
+ "loss": 0.4193,
775
+ "step": 121
776
+ },
777
+ {
778
+ "epoch": 5.03,
779
+ "learning_rate": 0.00017547892720306513,
780
+ "loss": 0.4954,
781
+ "step": 122
782
+ },
783
+ {
784
+ "epoch": 5.05,
785
+ "learning_rate": 0.00017509578544061302,
786
+ "loss": 0.4177,
787
+ "step": 123
788
+ },
789
+ {
790
+ "epoch": 5.07,
791
+ "learning_rate": 0.00017471264367816095,
792
+ "loss": 0.3927,
793
+ "step": 124
794
+ },
795
+ {
796
+ "epoch": 5.09,
797
+ "learning_rate": 0.00017432950191570881,
798
+ "loss": 0.3624,
799
+ "step": 125
800
+ },
801
+ {
802
+ "epoch": 5.1,
803
+ "learning_rate": 0.0001739463601532567,
804
+ "loss": 0.481,
805
+ "step": 126
806
+ },
807
+ {
808
+ "epoch": 5.12,
809
+ "learning_rate": 0.0001735632183908046,
810
+ "loss": 0.4035,
811
+ "step": 127
812
+ },
813
+ {
814
+ "epoch": 5.14,
815
+ "learning_rate": 0.0001731800766283525,
816
+ "loss": 0.4724,
817
+ "step": 128
818
+ },
819
+ {
820
+ "epoch": 5.16,
821
+ "learning_rate": 0.0001727969348659004,
822
+ "loss": 0.389,
823
+ "step": 129
824
+ },
825
+ {
826
+ "epoch": 5.17,
827
+ "learning_rate": 0.00017241379310344826,
828
+ "loss": 0.5242,
829
+ "step": 130
830
+ },
831
+ {
832
+ "epoch": 5.19,
833
+ "learning_rate": 0.00017203065134099618,
834
+ "loss": 0.4476,
835
+ "step": 131
836
+ },
837
+ {
838
+ "epoch": 5.21,
839
+ "learning_rate": 0.00017164750957854408,
840
+ "loss": 0.5525,
841
+ "step": 132
842
+ },
843
+ {
844
+ "epoch": 5.22,
845
+ "learning_rate": 0.00017126436781609197,
846
+ "loss": 0.4405,
847
+ "step": 133
848
+ },
849
+ {
850
+ "epoch": 5.24,
851
+ "learning_rate": 0.00017088122605363984,
852
+ "loss": 0.4422,
853
+ "step": 134
854
+ },
855
+ {
856
+ "epoch": 5.26,
857
+ "learning_rate": 0.00017049808429118774,
858
+ "loss": 0.5116,
859
+ "step": 135
860
+ },
861
+ {
862
+ "epoch": 5.28,
863
+ "learning_rate": 0.00017011494252873563,
864
+ "loss": 0.3784,
865
+ "step": 136
866
+ },
867
+ {
868
+ "epoch": 5.29,
869
+ "learning_rate": 0.00016973180076628356,
870
+ "loss": 0.5097,
871
+ "step": 137
872
+ },
873
+ {
874
+ "epoch": 5.31,
875
+ "learning_rate": 0.00016934865900383142,
876
+ "loss": 0.3885,
877
+ "step": 138
878
+ },
879
+ {
880
+ "epoch": 5.33,
881
+ "learning_rate": 0.00016896551724137932,
882
+ "loss": 0.4608,
883
+ "step": 139
884
+ },
885
+ {
886
+ "epoch": 5.34,
887
+ "learning_rate": 0.0001685823754789272,
888
+ "loss": 0.4212,
889
+ "step": 140
890
+ },
891
+ {
892
+ "epoch": 5.36,
893
+ "learning_rate": 0.0001681992337164751,
894
+ "loss": 0.4852,
895
+ "step": 141
896
+ },
897
+ {
898
+ "epoch": 5.38,
899
+ "learning_rate": 0.000167816091954023,
900
+ "loss": 0.484,
901
+ "step": 142
902
+ },
903
+ {
904
+ "epoch": 5.4,
905
+ "learning_rate": 0.00016743295019157087,
906
+ "loss": 0.5694,
907
+ "step": 143
908
+ },
909
+ {
910
+ "epoch": 5.41,
911
+ "learning_rate": 0.0001670498084291188,
912
+ "loss": 0.5575,
913
+ "step": 144
914
+ },
915
+ {
916
+ "epoch": 5.41,
917
+ "eval_loss": 0.4801803529262543,
918
+ "eval_runtime": 45.3478,
919
+ "eval_samples_per_second": 0.551,
920
+ "eval_steps_per_second": 0.154,
921
+ "step": 144
922
+ },
923
+ {
924
+ "epoch": 6.02,
925
+ "learning_rate": 0.0001666666666666667,
926
+ "loss": 0.4301,
927
+ "step": 145
928
+ },
929
+ {
930
+ "epoch": 6.03,
931
+ "learning_rate": 0.00016628352490421458,
932
+ "loss": 0.4271,
933
+ "step": 146
934
+ },
935
+ {
936
+ "epoch": 6.05,
937
+ "learning_rate": 0.00016590038314176245,
938
+ "loss": 0.4016,
939
+ "step": 147
940
+ },
941
+ {
942
+ "epoch": 6.07,
943
+ "learning_rate": 0.00016551724137931035,
944
+ "loss": 0.4963,
945
+ "step": 148
946
+ },
947
+ {
948
+ "epoch": 6.09,
949
+ "learning_rate": 0.00016513409961685824,
950
+ "loss": 0.4159,
951
+ "step": 149
952
+ },
953
+ {
954
+ "epoch": 6.1,
955
+ "learning_rate": 0.00016475095785440614,
956
+ "loss": 0.3818,
957
+ "step": 150
958
+ },
959
+ {
960
+ "epoch": 6.12,
961
+ "learning_rate": 0.00016436781609195403,
962
+ "loss": 0.3326,
963
+ "step": 151
964
+ },
965
+ {
966
+ "epoch": 6.14,
967
+ "learning_rate": 0.00016398467432950193,
968
+ "loss": 0.3099,
969
+ "step": 152
970
+ },
971
+ {
972
+ "epoch": 6.16,
973
+ "learning_rate": 0.00016360153256704982,
974
+ "loss": 0.3519,
975
+ "step": 153
976
+ },
977
+ {
978
+ "epoch": 6.17,
979
+ "learning_rate": 0.00016321839080459772,
980
+ "loss": 0.4866,
981
+ "step": 154
982
+ },
983
+ {
984
+ "epoch": 6.19,
985
+ "learning_rate": 0.0001628352490421456,
986
+ "loss": 0.4286,
987
+ "step": 155
988
+ },
989
+ {
990
+ "epoch": 6.21,
991
+ "learning_rate": 0.00016245210727969348,
992
+ "loss": 0.3815,
993
+ "step": 156
994
+ },
995
+ {
996
+ "epoch": 6.22,
997
+ "learning_rate": 0.00016206896551724137,
998
+ "loss": 0.4642,
999
+ "step": 157
1000
+ },
1001
+ {
1002
+ "epoch": 6.24,
1003
+ "learning_rate": 0.0001616858237547893,
1004
+ "loss": 0.4076,
1005
+ "step": 158
1006
+ },
1007
+ {
1008
+ "epoch": 6.26,
1009
+ "learning_rate": 0.0001613026819923372,
1010
+ "loss": 0.3396,
1011
+ "step": 159
1012
+ },
1013
+ {
1014
+ "epoch": 6.28,
1015
+ "learning_rate": 0.00016091954022988506,
1016
+ "loss": 0.2513,
1017
+ "step": 160
1018
+ },
1019
+ {
1020
+ "epoch": 6.29,
1021
+ "learning_rate": 0.00016053639846743295,
1022
+ "loss": 0.3305,
1023
+ "step": 161
1024
+ },
1025
+ {
1026
+ "epoch": 6.31,
1027
+ "learning_rate": 0.00016015325670498085,
1028
+ "loss": 0.4355,
1029
+ "step": 162
1030
+ },
1031
+ {
1032
+ "epoch": 6.33,
1033
+ "learning_rate": 0.00015977011494252874,
1034
+ "loss": 0.4543,
1035
+ "step": 163
1036
+ },
1037
+ {
1038
+ "epoch": 6.34,
1039
+ "learning_rate": 0.00015938697318007664,
1040
+ "loss": 0.3778,
1041
+ "step": 164
1042
+ },
1043
+ {
1044
+ "epoch": 6.36,
1045
+ "learning_rate": 0.00015900383141762453,
1046
+ "loss": 0.4365,
1047
+ "step": 165
1048
+ },
1049
+ {
1050
+ "epoch": 6.38,
1051
+ "learning_rate": 0.00015862068965517243,
1052
+ "loss": 0.4298,
1053
+ "step": 166
1054
+ },
1055
+ {
1056
+ "epoch": 6.4,
1057
+ "learning_rate": 0.00015823754789272032,
1058
+ "loss": 0.4427,
1059
+ "step": 167
1060
+ },
1061
+ {
1062
+ "epoch": 6.41,
1063
+ "learning_rate": 0.00015785440613026822,
1064
+ "loss": 0.4207,
1065
+ "step": 168
1066
+ },
1067
+ {
1068
+ "epoch": 6.41,
1069
+ "eval_loss": 0.47534212470054626,
1070
+ "eval_runtime": 45.2548,
1071
+ "eval_samples_per_second": 0.552,
1072
+ "eval_steps_per_second": 0.155,
1073
+ "step": 168
1074
+ }
1075
+ ],
1076
+ "logging_steps": 1,
1077
+ "max_steps": 580,
1078
+ "num_train_epochs": 10,
1079
+ "save_steps": 500,
1080
+ "total_flos": 2.731509408595968e+16,
1081
+ "trial_name": null,
1082
+ "trial_params": null
1083
+ }
checkpoint-168/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ce546eb4264ab344aa8968518085fc433063c48a0325f05059b318a2b48366
3
+ size 4091
runs/Sep30_04-59-44_dac8e12bb3c8/events.out.tfevents.1696049985.dac8e12bb3c8.5320.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f734adb5352477b5e6d9bb654da57d64e952945d3b2f19d9e8d3d9f1e8be9ee
3
+ size 44888
special_tokens_map.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "▁<PRE>",
4
+ "▁<MID>",
5
+ "▁<SUF>",
6
+ "▁<EOT>"
7
+ ],
8
+ "bos_token": "<s>",
9
+ "eos_token": "</s>",
10
+ "pad_token": "</s>",
11
+ "unk_token": "<unk>"
12
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
+ size 500058
tokenizer_config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32007": {
28
+ "content": "▁<PRE>",
29
+ "lstrip": true,
30
+ "normalized": false,
31
+ "rstrip": true,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32008": {
36
+ "content": "▁<SUF>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": true,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32009": {
44
+ "content": "▁<MID>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": true,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32010": {
52
+ "content": "▁<EOT>",
53
+ "lstrip": true,
54
+ "normalized": false,
55
+ "rstrip": true,
56
+ "single_word": false,
57
+ "special": true
58
+ }
59
+ },
60
+ "additional_special_tokens": [
61
+ "▁<PRE>",
62
+ "▁<MID>",
63
+ "▁<SUF>",
64
+ "▁<EOT>"
65
+ ],
66
+ "bos_token": "<s>",
67
+ "clean_up_tokenization_spaces": false,
68
+ "eos_token": "</s>",
69
+ "eot_token": "▁<EOT>",
70
+ "fill_token": "<FILL_ME>",
71
+ "legacy": null,
72
+ "middle_token": "▁<MID>",
73
+ "model_max_length": 1024,
74
+ "pad_token": null,
75
+ "prefix_token": "▁<PRE>",
76
+ "sp_model_kwargs": {},
77
+ "suffix_token": "▁<SUF>",
78
+ "tokenizer_class": "CodeLlamaTokenizer",
79
+ "unk_token": "<unk>",
80
+ "use_default_system_prompt": false
81
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ce546eb4264ab344aa8968518085fc433063c48a0325f05059b318a2b48366
3
+ size 4091
training_params.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": "codellama/CodeLlama-7b-Instruct-hf",
3
+ "data_path": ".",
4
+ "project_name": "qa-code-finetune",
5
+ "train_split": "train",
6
+ "valid_split": "valid",
7
+ "text_column": "text",
8
+ "token": null,
9
+ "lr": 0.0002,
10
+ "epochs": 10,
11
+ "batch_size": 4,
12
+ "warmup_ratio": 0.1,
13
+ "gradient_accumulation": 1,
14
+ "optimizer": "adamw_torch",
15
+ "scheduler": "linear",
16
+ "weight_decay": 0.0,
17
+ "max_grad_norm": 1.0,
18
+ "seed": 42,
19
+ "add_eos_token": false,
20
+ "block_size": -1,
21
+ "use_peft": true,
22
+ "lora_r": 16,
23
+ "lora_alpha": 32,
24
+ "lora_dropout": 0.05,
25
+ "logging_steps": -1,
26
+ "evaluation_strategy": "epoch",
27
+ "save_total_limit": 1,
28
+ "save_strategy": "epoch",
29
+ "auto_find_batch_size": false,
30
+ "fp16": false,
31
+ "push_to_hub": true,
32
+ "use_int8": false,
33
+ "model_max_length": 1024,
34
+ "repo_id": "shyam-incedoinc/codellama-7b-instruct-hf-qa-code-finetuned",
35
+ "use_int4": true,
36
+ "trainer": "sft",
37
+ "target_modules": null,
38
+ "merge_adapter": true,
39
+ "username": null,
40
+ "use_flash_attention_2": false
41
+ }