howard
commited on
Commit
•
2dd63f5
1
Parent(s):
a7bc093
50 state temperature
Browse files- README.md +41 -41
- adapter_config.json +5 -5
- adapter_model.bin +1 -1
- checkpoint-1239/adapter_config.json +5 -5
- checkpoint-1239/adapter_model.safetensors +1 -1
- checkpoint-1239/optimizer.pt +1 -1
- checkpoint-1239/trainer_state.json +0 -0
- checkpoint-1239/training_args.bin +1 -1
- checkpoint-1416/adapter_config.json +5 -5
- checkpoint-1416/adapter_model.safetensors +1 -1
- checkpoint-1416/optimizer.pt +1 -1
- checkpoint-1416/trainer_state.json +0 -0
- checkpoint-1416/training_args.bin +1 -1
- checkpoint-1593/adapter_config.json +5 -5
- checkpoint-1593/adapter_model.safetensors +1 -1
- checkpoint-1593/optimizer.pt +1 -1
- checkpoint-1593/trainer_state.json +0 -0
- checkpoint-1593/training_args.bin +1 -1
- checkpoint-1770/adapter_config.json +5 -5
- checkpoint-1770/adapter_model.safetensors +1 -1
- checkpoint-1770/optimizer.pt +1 -1
- checkpoint-1770/trainer_state.json +0 -0
- checkpoint-1770/training_args.bin +1 -1
README.md
CHANGED
@@ -92,7 +92,7 @@ seed: 42
|
|
92 |
|
93 |
</details><br>
|
94 |
|
95 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://rosewandb.ucsd.edu/cht028/finetune/runs/
|
96 |
# finetune/outputs/climate
|
97 |
|
98 |
This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the None dataset.
|
@@ -134,46 +134,46 @@ The following hyperparameters were used during training:
|
|
134 |
|
135 |
| Training Loss | Epoch | Step | Validation Loss |
|
136 |
|:-------------:|:------:|:----:|:---------------:|
|
137 |
-
| 1.
|
138 |
-
| 1.
|
139 |
-
|
|
140 |
-
|
|
141 |
-
| 0.
|
142 |
-
| 0.
|
143 |
-
| 0.
|
144 |
-
| 0.
|
145 |
-
| 0.
|
146 |
-
| 0.
|
147 |
-
| 0.
|
148 |
-
| 0.
|
149 |
-
| 0.
|
150 |
-
| 0.
|
151 |
-
| 0.
|
152 |
-
| 0.
|
153 |
-
| 0.
|
154 |
-
| 0.
|
155 |
-
| 0.
|
156 |
-
| 0.
|
157 |
-
| 0.
|
158 |
-
| 0.
|
159 |
-
| 0.
|
160 |
-
| 0.
|
161 |
-
| 0.
|
162 |
-
| 0.
|
163 |
-
| 0.
|
164 |
-
| 0.
|
165 |
-
| 0.
|
166 |
-
| 0.
|
167 |
-
| 0.
|
168 |
-
| 0.
|
169 |
-
| 0.
|
170 |
-
| 0.
|
171 |
-
| 0.
|
172 |
-
| 0.
|
173 |
-
| 0.
|
174 |
-
| 0.
|
175 |
-
| 0.
|
176 |
-
| 0.
|
177 |
|
178 |
|
179 |
### Framework versions
|
|
|
92 |
|
93 |
</details><br>
|
94 |
|
95 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://rosewandb.ucsd.edu/cht028/finetune/runs/8a5o02qn)
|
96 |
# finetune/outputs/climate
|
97 |
|
98 |
This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the None dataset.
|
|
|
134 |
|
135 |
| Training Loss | Epoch | Step | Validation Loss |
|
136 |
|:-------------:|:------:|:----:|:---------------:|
|
137 |
+
| 1.7628 | 0.0056 | 1 | 1.9544 |
|
138 |
+
| 1.1905 | 0.2542 | 45 | 1.2650 |
|
139 |
+
| 1.0583 | 0.5085 | 90 | 1.1289 |
|
140 |
+
| 0.9094 | 0.7627 | 135 | 0.9717 |
|
141 |
+
| 0.6033 | 1.0169 | 180 | 0.7865 |
|
142 |
+
| 0.6043 | 1.2712 | 225 | 0.6347 |
|
143 |
+
| 0.3525 | 1.5254 | 270 | 0.4456 |
|
144 |
+
| 0.1879 | 1.7797 | 315 | 0.2918 |
|
145 |
+
| 0.1367 | 2.0339 | 360 | 0.1608 |
|
146 |
+
| 0.1627 | 2.2881 | 405 | 0.1098 |
|
147 |
+
| 0.1465 | 2.5424 | 450 | 0.0722 |
|
148 |
+
| 0.1019 | 2.7966 | 495 | 0.0458 |
|
149 |
+
| 0.161 | 3.0508 | 540 | 0.0354 |
|
150 |
+
| 0.0597 | 3.3051 | 585 | 0.0189 |
|
151 |
+
| 0.1038 | 3.5593 | 630 | 0.0130 |
|
152 |
+
| 0.0754 | 3.8136 | 675 | 0.0078 |
|
153 |
+
| 0.0632 | 4.0678 | 720 | 0.0051 |
|
154 |
+
| 0.0364 | 4.3220 | 765 | 0.0032 |
|
155 |
+
| 0.1342 | 4.5763 | 810 | 0.0019 |
|
156 |
+
| 0.0776 | 4.8305 | 855 | 0.0014 |
|
157 |
+
| 0.0337 | 5.0847 | 900 | 0.0012 |
|
158 |
+
| 0.0591 | 5.3390 | 945 | 0.0011 |
|
159 |
+
| 0.0171 | 5.5932 | 990 | 0.0010 |
|
160 |
+
| 0.0732 | 5.8475 | 1035 | 0.0010 |
|
161 |
+
| 0.0538 | 6.1017 | 1080 | 0.0010 |
|
162 |
+
| 0.0234 | 6.3559 | 1125 | 0.0010 |
|
163 |
+
| 0.1259 | 6.6102 | 1170 | 0.0009 |
|
164 |
+
| 0.1216 | 6.8644 | 1215 | 0.0009 |
|
165 |
+
| 0.0687 | 7.1186 | 1260 | 0.0009 |
|
166 |
+
| 0.1172 | 7.3729 | 1305 | 0.0009 |
|
167 |
+
| 0.1007 | 7.6271 | 1350 | 0.0009 |
|
168 |
+
| 0.1372 | 7.8814 | 1395 | 0.0009 |
|
169 |
+
| 0.0925 | 8.1356 | 1440 | 0.0009 |
|
170 |
+
| 0.0342 | 8.3898 | 1485 | 0.0009 |
|
171 |
+
| 0.0688 | 8.6441 | 1530 | 0.0009 |
|
172 |
+
| 0.0576 | 8.8983 | 1575 | 0.0009 |
|
173 |
+
| 0.0575 | 9.1525 | 1620 | 0.0009 |
|
174 |
+
| 0.0707 | 9.4068 | 1665 | 0.0009 |
|
175 |
+
| 0.1519 | 9.6610 | 1710 | 0.0009 |
|
176 |
+
| 0.0666 | 9.9153 | 1755 | 0.0009 |
|
177 |
|
178 |
|
179 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -20,13 +20,13 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"
|
24 |
-
"k_proj",
|
25 |
-
"o_proj",
|
26 |
"q_proj",
|
27 |
"gate_proj",
|
28 |
-
"
|
29 |
-
"up_proj"
|
|
|
|
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"v_proj",
|
|
|
|
|
24 |
"q_proj",
|
25 |
"gate_proj",
|
26 |
+
"o_proj",
|
27 |
+
"up_proj",
|
28 |
+
"down_proj",
|
29 |
+
"k_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167934026
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d00ffcc4d191f9d69525da11019c028a06537e3d129d9be5a1b37a605f27a585
|
3 |
size 167934026
|
checkpoint-1239/adapter_config.json
CHANGED
@@ -20,13 +20,13 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"
|
24 |
-
"k_proj",
|
25 |
-
"o_proj",
|
26 |
"q_proj",
|
27 |
"gate_proj",
|
28 |
-
"
|
29 |
-
"up_proj"
|
|
|
|
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"v_proj",
|
|
|
|
|
24 |
"q_proj",
|
25 |
"gate_proj",
|
26 |
+
"o_proj",
|
27 |
+
"up_proj",
|
28 |
+
"down_proj",
|
29 |
+
"k_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
checkpoint-1239/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832688
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ddd91110dca909691fbbb1784527bce170129270d7735c824dc4f4e90975d31
|
3 |
size 167832688
|
checkpoint-1239/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671364538
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da066899a9d98293612f2dce80ad58c32705496ef15ef46638bec899d28baa4d
|
3 |
size 671364538
|
checkpoint-1239/trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1239/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13d0b0690127a596103b9e3d544fe6576e170a3f1dd7dfbfda422d3daf886d21
|
3 |
size 6072
|
checkpoint-1416/adapter_config.json
CHANGED
@@ -20,13 +20,13 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"
|
24 |
-
"k_proj",
|
25 |
-
"o_proj",
|
26 |
"q_proj",
|
27 |
"gate_proj",
|
28 |
-
"
|
29 |
-
"up_proj"
|
|
|
|
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"v_proj",
|
|
|
|
|
24 |
"q_proj",
|
25 |
"gate_proj",
|
26 |
+
"o_proj",
|
27 |
+
"up_proj",
|
28 |
+
"down_proj",
|
29 |
+
"k_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
checkpoint-1416/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832688
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca2dc61cb7d55436b4e330adf2427c85fe093f826dfae905957b8d82210b28d0
|
3 |
size 167832688
|
checkpoint-1416/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671364538
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d376e4a14b958543cb85e3dc7f8e344712201739b4c1c178a6214ef0111a80e
|
3 |
size 671364538
|
checkpoint-1416/trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1416/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13d0b0690127a596103b9e3d544fe6576e170a3f1dd7dfbfda422d3daf886d21
|
3 |
size 6072
|
checkpoint-1593/adapter_config.json
CHANGED
@@ -20,13 +20,13 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"
|
24 |
-
"k_proj",
|
25 |
-
"o_proj",
|
26 |
"q_proj",
|
27 |
"gate_proj",
|
28 |
-
"
|
29 |
-
"up_proj"
|
|
|
|
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"v_proj",
|
|
|
|
|
24 |
"q_proj",
|
25 |
"gate_proj",
|
26 |
+
"o_proj",
|
27 |
+
"up_proj",
|
28 |
+
"down_proj",
|
29 |
+
"k_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
checkpoint-1593/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832688
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb6cbad22ed544dffe042850c42ff19590b67687e905c083b54f4723a89b448d
|
3 |
size 167832688
|
checkpoint-1593/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671364538
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7cddd85d799af3863a131f4dcbac5f96ce461de045c6de2c770be64430b8ed1
|
3 |
size 671364538
|
checkpoint-1593/trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1593/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13d0b0690127a596103b9e3d544fe6576e170a3f1dd7dfbfda422d3daf886d21
|
3 |
size 6072
|
checkpoint-1770/adapter_config.json
CHANGED
@@ -20,13 +20,13 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"
|
24 |
-
"k_proj",
|
25 |
-
"o_proj",
|
26 |
"q_proj",
|
27 |
"gate_proj",
|
28 |
-
"
|
29 |
-
"up_proj"
|
|
|
|
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"v_proj",
|
|
|
|
|
24 |
"q_proj",
|
25 |
"gate_proj",
|
26 |
+
"o_proj",
|
27 |
+
"up_proj",
|
28 |
+
"down_proj",
|
29 |
+
"k_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
checkpoint-1770/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 167832688
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b87d6907367ed6f7381869da57a7f2ca075e74361a0eb7f11331a607f5560075
|
3 |
size 167832688
|
checkpoint-1770/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671364538
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8070bdb6da0e653c4beb4c6f43a822bf1fd2d49093f5353d66dda7a3ef0850e
|
3 |
size 671364538
|
checkpoint-1770/trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1770/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13d0b0690127a596103b9e3d544fe6576e170a3f1dd7dfbfda422d3daf886d21
|
3 |
size 6072
|