brettbbb commited on
Commit
6a21d75
1 Parent(s): 2a2d379

End of training

Browse files
README.md CHANGED
@@ -34,13 +34,13 @@ More information needed
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 0.0002
37
- - train_batch_size: 1
38
  - eval_batch_size: 8
39
  - seed: 42
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: linear
42
  - lr_scheduler_warmup_steps: 2
43
- - training_steps: 3
44
  - mixed_precision_training: Native AMP
45
 
46
  ### Training results
 
34
 
35
  The following hyperparameters were used during training:
36
  - learning_rate: 0.0002
37
+ - train_batch_size: 4
38
  - eval_batch_size: 8
39
  - seed: 42
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: linear
42
  - lr_scheduler_warmup_steps: 2
43
+ - training_steps: 20
44
  - mixed_precision_training: Native AMP
45
 
46
  ### Training results
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": null,
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -16,7 +16,13 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "base_layer"
 
 
 
 
 
 
20
  ],
21
  "task_type": "SEQ_CLS"
22
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "o_proj",
20
+ "v_proj",
21
+ "up_proj",
22
+ "down_proj",
23
+ "k_proj",
24
+ "q_proj",
25
+ "gate_proj"
26
  ],
27
  "task_type": "SEQ_CLS"
28
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11d1c82228579d983d2866efc6c53db9e6f4f7fa8c9b39ec57bf88c0994264ab
3
- size 320373198
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09c2b7002ff788d88d15bd99682d6628ff38f37610acdc8fc2a94ba41d5fb965
3
+ size 160283150
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d403c5891130d9cf0429788d8aed07ac05883ec05cb5d5eb4c6cd826bc7089b
3
- size 320169624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb6d31e55d2ba6ceda27724d1db9bd07caa18ffbb27cc008f54b0abe21e0dffa
3
+ size 160180976
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.0,
3
- "total_flos": 32571857534976.0,
4
- "train_loss": 6.678385416666667,
5
- "train_runtime": 6.9231,
6
- "train_samples_per_second": 0.433,
7
- "train_steps_per_second": 0.433
8
  }
 
1
  {
2
+ "epoch": 0.12,
3
+ "total_flos": 825752514723840.0,
4
+ "train_loss": 0.38747100830078124,
5
+ "train_runtime": 30.2879,
6
+ "train_samples_per_second": 2.641,
7
+ "train_steps_per_second": 0.66
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.0,
3
- "total_flos": 32571857534976.0,
4
- "train_loss": 6.678385416666667,
5
- "train_runtime": 6.9231,
6
- "train_samples_per_second": 0.433,
7
- "train_steps_per_second": 0.433
8
  }
 
1
  {
2
+ "epoch": 0.12,
3
+ "total_flos": 825752514723840.0,
4
+ "train_loss": 0.38747100830078124,
5
+ "train_runtime": 30.2879,
6
+ "train_samples_per_second": 2.641,
7
+ "train_steps_per_second": 0.66
8
  }
trainer_state.json CHANGED
@@ -1,46 +1,148 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.004594180704441042,
5
  "eval_steps": 500,
6
- "global_step": 3,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
  "learning_rate": 0.0,
14
- "loss": 6.625,
15
  "step": 1
16
  },
17
  {
18
- "epoch": 0.0,
19
  "learning_rate": 0.0,
20
- "loss": 6.1719,
21
  "step": 2
22
  },
23
  {
24
- "epoch": 0.0,
25
  "learning_rate": 0.0,
26
- "loss": 7.2383,
27
  "step": 3
28
  },
29
  {
30
- "epoch": 0.0,
31
- "step": 3,
32
- "total_flos": 32571857534976.0,
33
- "train_loss": 6.678385416666667,
34
- "train_runtime": 6.9231,
35
- "train_samples_per_second": 0.433,
36
- "train_steps_per_second": 0.433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }
38
  ],
39
  "logging_steps": 1,
40
- "max_steps": 3,
41
  "num_train_epochs": 1,
42
  "save_steps": 500,
43
- "total_flos": 32571857534976.0,
44
  "trial_name": null,
45
  "trial_params": null
46
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.12195121951219512,
5
  "eval_steps": 500,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01,
13
  "learning_rate": 0.0,
14
+ "loss": 1.1529,
15
  "step": 1
16
  },
17
  {
18
+ "epoch": 0.01,
19
  "learning_rate": 0.0,
20
+ "loss": 1.7234,
21
  "step": 2
22
  },
23
  {
24
+ "epoch": 0.02,
25
  "learning_rate": 0.0,
26
+ "loss": 1.1025,
27
  "step": 3
28
  },
29
  {
30
+ "epoch": 0.02,
31
+ "learning_rate": 0.0,
32
+ "loss": 0.8796,
33
+ "step": 4
34
+ },
35
+ {
36
+ "epoch": 0.03,
37
+ "learning_rate": 0.0,
38
+ "loss": 0.9621,
39
+ "step": 5
40
+ },
41
+ {
42
+ "epoch": 0.04,
43
+ "learning_rate": 0.0001,
44
+ "loss": 1.2671,
45
+ "step": 6
46
+ },
47
+ {
48
+ "epoch": 0.04,
49
+ "learning_rate": 0.0002,
50
+ "loss": 0.6072,
51
+ "step": 7
52
+ },
53
+ {
54
+ "epoch": 0.05,
55
+ "learning_rate": 0.00018888888888888888,
56
+ "loss": 0.0546,
57
+ "step": 8
58
+ },
59
+ {
60
+ "epoch": 0.05,
61
+ "learning_rate": 0.00017777777777777779,
62
+ "loss": 0.0,
63
+ "step": 9
64
+ },
65
+ {
66
+ "epoch": 0.06,
67
+ "learning_rate": 0.0001666666666666667,
68
+ "loss": 0.0,
69
+ "step": 10
70
+ },
71
+ {
72
+ "epoch": 0.07,
73
+ "learning_rate": 0.00015555555555555556,
74
+ "loss": 0.0,
75
+ "step": 11
76
+ },
77
+ {
78
+ "epoch": 0.07,
79
+ "learning_rate": 0.00014444444444444444,
80
+ "loss": 0.0,
81
+ "step": 12
82
+ },
83
+ {
84
+ "epoch": 0.08,
85
+ "learning_rate": 0.00013333333333333334,
86
+ "loss": 0.0,
87
+ "step": 13
88
+ },
89
+ {
90
+ "epoch": 0.09,
91
+ "learning_rate": 0.00012222222222222224,
92
+ "loss": 0.0,
93
+ "step": 14
94
+ },
95
+ {
96
+ "epoch": 0.09,
97
+ "learning_rate": 0.00011111111111111112,
98
+ "loss": 0.0,
99
+ "step": 15
100
+ },
101
+ {
102
+ "epoch": 0.1,
103
+ "learning_rate": 0.0001,
104
+ "loss": 0.0,
105
+ "step": 16
106
+ },
107
+ {
108
+ "epoch": 0.1,
109
+ "learning_rate": 8.888888888888889e-05,
110
+ "loss": 0.0,
111
+ "step": 17
112
+ },
113
+ {
114
+ "epoch": 0.11,
115
+ "learning_rate": 7.777777777777778e-05,
116
+ "loss": 0.0,
117
+ "step": 18
118
+ },
119
+ {
120
+ "epoch": 0.12,
121
+ "learning_rate": 6.666666666666667e-05,
122
+ "loss": 0.0,
123
+ "step": 19
124
+ },
125
+ {
126
+ "epoch": 0.12,
127
+ "learning_rate": 5.555555555555556e-05,
128
+ "loss": 0.0,
129
+ "step": 20
130
+ },
131
+ {
132
+ "epoch": 0.12,
133
+ "step": 20,
134
+ "total_flos": 825752514723840.0,
135
+ "train_loss": 0.38747100830078124,
136
+ "train_runtime": 30.2879,
137
+ "train_samples_per_second": 2.641,
138
+ "train_steps_per_second": 0.66
139
  }
140
  ],
141
  "logging_steps": 1,
142
+ "max_steps": 20,
143
  "num_train_epochs": 1,
144
  "save_steps": 500,
145
+ "total_flos": 825752514723840.0,
146
  "trial_name": null,
147
  "trial_params": null
148
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e25ab091eda0fcef4e0a9e37f22496431e57d92ab798a86fa7a60f8d8ed78a5d
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c98373fb85d7bc426671be530b1abe4c64f097fe2b70dadd3f279a36bcf6587
3
  size 4600