zuhashaik commited on
Commit
e89a715
1 Parent(s): eb05a61

Pushing to origin

Browse files
adapter_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "openchat/openchat_3.5",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "lora_alpha": 64,
12
+ "lora_dropout": 0.2,
13
+ "modules_to_save": null,
14
+ "peft_type": "LORA",
15
+ "r": 16,
16
+ "rank_pattern": {},
17
+ "revision": null,
18
+ "target_modules": [
19
+ "q_proj",
20
+ "down_proj",
21
+ "k_proj",
22
+ "v_proj",
23
+ "up_proj",
24
+ "gate_proj",
25
+ "o_proj"
26
+ ],
27
+ "task_type": "SEQ_CLS"
28
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ccbd768a0746d8c2562a703c540d4fbca7f0b7335f9e0ebef7718a601fa3549
3
+ size 167865112
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|end_of_turn|>": 32000,
3
+ "<|pad_0|>": 32001
4
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d451df152e7dda8036bfb6ee074f9b6555575f9692d1dc1abe60349580223b54
3
+ size 335988821
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a46711a14e82bd0e824e0578189b82a96908aa8ce3724b35f78ce3ba7a95addb
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec88f76ff8b6a16ba28e412bf4f5d61b95a316d71571bb1dcae14aec8f9e370
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|end_of_turn|>",
4
+ "<|pad_0|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<s>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|end_of_turn|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": "<s>",
21
+ "unk_token": {
22
+ "content": "<unk>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<|end_of_turn|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<|pad_0|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "additional_special_tokens": [
45
+ "<|end_of_turn|>",
46
+ "<|pad_0|>"
47
+ ],
48
+ "bos_token": "<s>",
49
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + message['role'].title() + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}",
50
+ "clean_up_tokenization_spaces": false,
51
+ "eos_token": "<|end_of_turn|>",
52
+ "legacy": true,
53
+ "model_max_length": 1000000000000000019884624838656,
54
+ "pad_token": "<s>",
55
+ "sp_model_kwargs": {},
56
+ "spaces_between_special_tokens": false,
57
+ "tokenizer_class": "LlamaTokenizer",
58
+ "unk_token": "<unk>",
59
+ "use_default_system_prompt": true
60
+ }
trainer_state.json ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "learning_rate": 2e-05,
14
+ "loss": 1.5991,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 1.0,
19
+ "eval_accuracy": 0.638,
20
+ "eval_loss": 0.7116702795028687,
21
+ "eval_macro_f1": 0.633566150420083,
22
+ "eval_runtime": 89.1695,
23
+ "eval_samples_per_second": 5.607,
24
+ "eval_steps_per_second": 0.561,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "learning_rate": 2e-05,
30
+ "loss": 0.6537,
31
+ "step": 200
32
+ },
33
+ {
34
+ "epoch": 2.0,
35
+ "eval_accuracy": 0.618,
36
+ "eval_loss": 1.2388803958892822,
37
+ "eval_macro_f1": 0.5674015555424694,
38
+ "eval_runtime": 89.196,
39
+ "eval_samples_per_second": 5.606,
40
+ "eval_steps_per_second": 0.561,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 3.0,
45
+ "learning_rate": 2e-05,
46
+ "loss": 0.3544,
47
+ "step": 300
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "eval_accuracy": 0.708,
52
+ "eval_loss": 0.6158359050750732,
53
+ "eval_macro_f1": 0.7078317110655739,
54
+ "eval_runtime": 89.1778,
55
+ "eval_samples_per_second": 5.607,
56
+ "eval_steps_per_second": 0.561,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 4.0,
61
+ "learning_rate": 2e-05,
62
+ "loss": 0.1829,
63
+ "step": 400
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "eval_accuracy": 0.708,
68
+ "eval_loss": 1.6795860528945923,
69
+ "eval_macro_f1": 0.7078317110655739,
70
+ "eval_runtime": 89.1817,
71
+ "eval_samples_per_second": 5.607,
72
+ "eval_steps_per_second": 0.561,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 5.0,
77
+ "learning_rate": 2e-05,
78
+ "loss": 0.1188,
79
+ "step": 500
80
+ },
81
+ {
82
+ "epoch": 5.0,
83
+ "eval_accuracy": 0.704,
84
+ "eval_loss": 1.4835941791534424,
85
+ "eval_macro_f1": 0.7039242045963766,
86
+ "eval_runtime": 89.1768,
87
+ "eval_samples_per_second": 5.607,
88
+ "eval_steps_per_second": 0.561,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 6.0,
93
+ "learning_rate": 2e-05,
94
+ "loss": 0.0924,
95
+ "step": 600
96
+ },
97
+ {
98
+ "epoch": 6.0,
99
+ "eval_accuracy": 0.698,
100
+ "eval_loss": 2.3937265872955322,
101
+ "eval_macro_f1": 0.6914223590045407,
102
+ "eval_runtime": 89.1641,
103
+ "eval_samples_per_second": 5.608,
104
+ "eval_steps_per_second": 0.561,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 7.0,
109
+ "learning_rate": 2e-05,
110
+ "loss": 0.0913,
111
+ "step": 700
112
+ },
113
+ {
114
+ "epoch": 7.0,
115
+ "eval_accuracy": 0.706,
116
+ "eval_loss": 1.8895695209503174,
117
+ "eval_macro_f1": 0.7053765768365863,
118
+ "eval_runtime": 89.2883,
119
+ "eval_samples_per_second": 5.6,
120
+ "eval_steps_per_second": 0.56,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 8.0,
125
+ "learning_rate": 2e-05,
126
+ "loss": 0.1357,
127
+ "step": 800
128
+ },
129
+ {
130
+ "epoch": 8.0,
131
+ "eval_accuracy": 0.714,
132
+ "eval_loss": 2.458871603012085,
133
+ "eval_macro_f1": 0.7132832080200502,
134
+ "eval_runtime": 89.4099,
135
+ "eval_samples_per_second": 5.592,
136
+ "eval_steps_per_second": 0.559,
137
+ "step": 800
138
+ },
139
+ {
140
+ "epoch": 9.0,
141
+ "learning_rate": 2e-05,
142
+ "loss": 0.0432,
143
+ "step": 900
144
+ },
145
+ {
146
+ "epoch": 9.0,
147
+ "eval_accuracy": 0.742,
148
+ "eval_loss": 3.152550220489502,
149
+ "eval_macro_f1": 0.7390681699576441,
150
+ "eval_runtime": 89.2296,
151
+ "eval_samples_per_second": 5.604,
152
+ "eval_steps_per_second": 0.56,
153
+ "step": 900
154
+ },
155
+ {
156
+ "epoch": 10.0,
157
+ "learning_rate": 2e-05,
158
+ "loss": 0.0491,
159
+ "step": 1000
160
+ },
161
+ {
162
+ "epoch": 10.0,
163
+ "eval_accuracy": 0.754,
164
+ "eval_loss": 3.035457134246826,
165
+ "eval_macro_f1": 0.7527886644558337,
166
+ "eval_runtime": 89.2033,
167
+ "eval_samples_per_second": 5.605,
168
+ "eval_steps_per_second": 0.561,
169
+ "step": 1000
170
+ }
171
+ ],
172
+ "logging_steps": 100,
173
+ "max_steps": 1000,
174
+ "num_train_epochs": 10,
175
+ "save_steps": 500,
176
+ "total_flos": 3.913814800576512e+17,
177
+ "trial_name": null,
178
+ "trial_params": null
179
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84ac70da7f431a52a66f04d05a79f0f03d66b171a914b3d983a8770894f0dd4e
3
+ size 4600