theoldmandthesea commited on
Commit
d146f81
1 Parent(s): f9e53cb

Updated model

Browse files
README.md CHANGED
@@ -40,7 +40,7 @@ The following hyperparameters were used during training:
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: linear
42
  - lr_scheduler_warmup_steps: 2
43
- - training_steps: 6
44
 
45
  ### Training results
46
 
 
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: linear
42
  - lr_scheduler_warmup_steps: 2
43
+ - training_steps: 200
44
 
45
  ### Training results
46
 
adapter_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "auto_mapping": null,
3
- "base_model_name_or_path": null,
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
@@ -14,13 +14,13 @@
14
  "r": 16,
15
  "revision": null,
16
  "target_modules": [
 
 
 
17
  "gate_proj",
18
  "o_proj",
19
- "down_proj",
20
- "k_proj",
21
  "q_proj",
22
- "v_proj",
23
- "up_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
1
  {
2
  "auto_mapping": null,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
 
14
  "r": 16,
15
  "revision": null,
16
  "target_modules": [
17
+ "up_proj",
18
+ "v_proj",
19
+ "down_proj",
20
  "gate_proj",
21
  "o_proj",
 
 
22
  "q_proj",
23
+ "k_proj"
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a2fc323a0408cee7d756cfb402c31c80a8dcd3e782dc114de20c42a63ed16a2
3
- size 160077005
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c0fb3ea95a6c5cb64da617c99063dc56a3402f5b1b8fa595b086528289058db
3
+ size 160069389
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.0,
3
- "total_flos": 42650566778880.0,
4
- "train_loss": 2.389628251393636,
5
- "train_runtime": 39.2629,
6
- "train_samples_per_second": 0.611,
7
- "train_steps_per_second": 0.153
8
  }
 
1
  {
2
+ "epoch": 0.05,
3
+ "total_flos": 1428558744158208.0,
4
+ "train_loss": 1.3583835124969483,
5
+ "train_runtime": 1540.4432,
6
+ "train_samples_per_second": 0.519,
7
+ "train_steps_per_second": 0.13
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.0,
3
- "total_flos": 42650566778880.0,
4
- "train_loss": 2.389628251393636,
5
- "train_runtime": 39.2629,
6
- "train_samples_per_second": 0.611,
7
- "train_steps_per_second": 0.153
8
  }
 
1
  {
2
+ "epoch": 0.05,
3
+ "total_flos": 1428558744158208.0,
4
+ "train_loss": 1.3583835124969483,
5
+ "train_runtime": 1540.4432,
6
+ "train_samples_per_second": 0.519,
7
+ "train_steps_per_second": 0.13
8
  }
trainer_state.json CHANGED
@@ -1,28 +1,148 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0013729977116704805,
5
  "eval_steps": 500,
6
- "global_step": 6,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "step": 6,
14
- "total_flos": 42650566778880.0,
15
- "train_loss": 2.389628251393636,
16
- "train_runtime": 39.2629,
17
- "train_samples_per_second": 0.611,
18
- "train_steps_per_second": 0.153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 10,
22
- "max_steps": 6,
23
  "num_train_epochs": 1,
24
  "save_steps": 500,
25
- "total_flos": 42650566778880.0,
26
  "trial_name": null,
27
  "trial_params": null
28
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04576659038901602,
5
  "eval_steps": 500,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 0.00019292929292929293,
14
+ "loss": 2.0259,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.0,
19
+ "learning_rate": 0.00018282828282828283,
20
+ "loss": 1.4961,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.01,
25
+ "learning_rate": 0.00017272727272727275,
26
+ "loss": 1.3953,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.01,
31
+ "learning_rate": 0.00016262626262626264,
32
+ "loss": 1.3234,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.01,
37
+ "learning_rate": 0.00015252525252525253,
38
+ "loss": 1.4601,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.01,
43
+ "learning_rate": 0.00014242424242424243,
44
+ "loss": 1.3362,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.02,
49
+ "learning_rate": 0.00013232323232323235,
50
+ "loss": 1.3905,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.02,
55
+ "learning_rate": 0.00012222222222222224,
56
+ "loss": 1.3132,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.02,
61
+ "learning_rate": 0.00011212121212121212,
62
+ "loss": 1.2767,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.02,
67
+ "learning_rate": 0.00010202020202020202,
68
+ "loss": 1.3303,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.03,
73
+ "learning_rate": 9.191919191919192e-05,
74
+ "loss": 1.3685,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 0.03,
79
+ "learning_rate": 8.181818181818183e-05,
80
+ "loss": 1.335,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 0.03,
85
+ "learning_rate": 7.171717171717171e-05,
86
+ "loss": 1.3122,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 0.03,
91
+ "learning_rate": 6.161616161616162e-05,
92
+ "loss": 1.2693,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 0.03,
97
+ "learning_rate": 5.151515151515152e-05,
98
+ "loss": 1.2069,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 0.04,
103
+ "learning_rate": 4.141414141414142e-05,
104
+ "loss": 1.2302,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 0.04,
109
+ "learning_rate": 3.131313131313132e-05,
110
+ "loss": 1.2451,
111
+ "step": 170
112
+ },
113
+ {
114
+ "epoch": 0.04,
115
+ "learning_rate": 2.1212121212121215e-05,
116
+ "loss": 1.2993,
117
+ "step": 180
118
+ },
119
+ {
120
+ "epoch": 0.04,
121
+ "learning_rate": 1.1111111111111112e-05,
122
+ "loss": 1.2968,
123
+ "step": 190
124
+ },
125
+ {
126
+ "epoch": 0.05,
127
+ "learning_rate": 1.0101010101010103e-06,
128
+ "loss": 1.2567,
129
+ "step": 200
130
+ },
131
+ {
132
+ "epoch": 0.05,
133
+ "step": 200,
134
+ "total_flos": 1428558744158208.0,
135
+ "train_loss": 1.3583835124969483,
136
+ "train_runtime": 1540.4432,
137
+ "train_samples_per_second": 0.519,
138
+ "train_steps_per_second": 0.13
139
  }
140
  ],
141
  "logging_steps": 10,
142
+ "max_steps": 200,
143
  "num_train_epochs": 1,
144
  "save_steps": 500,
145
+ "total_flos": 1428558744158208.0,
146
  "trial_name": null,
147
  "trial_params": null
148
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0ac0db2deb2139cb448a023a3dc53a82c952d78c89d126c7a619268f1a86d61
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af3d684fb9b885bdc10d2bedf59709dfd4ef3c01f7e374a9a07ad916540d937
3
  size 4091