timewanderer commited on
Commit
a886df7
1 Parent(s): 6b91724

Training in progress, step 1000

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1de8090ce06ee283276a4164fb3fc1cbb41f07f3933cb5b30a7a520d950cd8bb
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab3467fe7b706783d195082717277e7993176e96abb7f168a29914f5a59ec505
3
  size 268290900
run-1/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:866be8cdc735bbe36a639244e641a1ac7cb151100c189abd5e0204a9b16f0c3b
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab3467fe7b706783d195082717277e7993176e96abb7f168a29914f5a59ec505
3
  size 268290900
run-1/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a5957e7a6bfaac777273b29656197645c0696c422bfdd157dc5bc4f63c4e05a
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:055188a1969f89bad7cabd4dce4c1ace96fe3963fb776164fed4776bce4fdc97
3
  size 536643898
run-1/checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60ef01273aaa599804d51a93c6c0c61874ebd50ab9e135a21aab5f7cd0e6487a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c794bc4c67ef18245dd516031ce405ab557e4d551d225d8dd1e1abc0f2be8e33
3
  size 1064
run-1/checkpoint-1000/trainer_state.json CHANGED
@@ -10,50 +10,50 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5980645161290322,
14
- "eval_loss": 0.2370469570159912,
15
- "eval_runtime": 5.131,
16
- "eval_samples_per_second": 604.169,
17
- "eval_steps_per_second": 12.668,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5828151106834412,
23
- "learning_rate": 1.4758909853249476e-05,
24
- "loss": 0.3737,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8264516129032258,
30
- "eval_loss": 0.11328593641519547,
31
- "eval_runtime": 5.5375,
32
- "eval_samples_per_second": 559.823,
33
- "eval_steps_per_second": 11.738,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8780645161290322,
39
- "eval_loss": 0.0755920559167862,
40
- "eval_runtime": 5.1832,
41
- "eval_samples_per_second": 598.091,
42
- "eval_steps_per_second": 12.541,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.5158044099807739,
48
- "learning_rate": 9.517819706498952e-06,
49
- "loss": 0.1316,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
- "max_steps": 1908,
55
  "num_input_tokens_seen": 0,
56
- "num_train_epochs": 6,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
@@ -71,8 +71,8 @@
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
- "alpha": 0.4750594581069757,
75
- "num_train_epochs": 6,
76
- "temperature": 5
77
  }
78
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5838709677419355,
14
+ "eval_loss": 0.19346614181995392,
15
+ "eval_runtime": 5.1609,
16
+ "eval_samples_per_second": 600.675,
17
+ "eval_steps_per_second": 12.595,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5179193019866943,
23
+ "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.3125,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.82,
30
+ "eval_loss": 0.09356740862131119,
31
+ "eval_runtime": 5.2878,
32
+ "eval_samples_per_second": 586.255,
33
+ "eval_steps_per_second": 12.292,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8790322580645161,
39
+ "eval_loss": 0.06245172396302223,
40
+ "eval_runtime": 5.0672,
41
+ "eval_samples_per_second": 611.776,
42
+ "eval_steps_per_second": 12.828,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.4341842532157898,
48
+ "learning_rate": 1.371069182389937e-05,
49
+ "loss": 0.1085,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
+ "max_steps": 3180,
55
  "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 10,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
 
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.5365842802514643,
75
+ "num_train_epochs": 10,
76
+ "temperature": 14
77
  }
78
  }
run-1/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b3c3e04f27e1abfa351236657f60e84eba5fb921cf293902231af05b1d43c52
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff76e4271bd0bf9763ec56f904c0eab2f55fe172f25f099aa8c57c7f7749007
3
  size 5240
runs/Oct11_17-03-17_c179252869f4/events.out.tfevents.1728667239.c179252869f4.732.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:820d379076a01b53ffef66bbe4800d9d4c680409363b24b7f8b2944a79b2ed89
3
- size 13299
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10367c984244420d47ea408506189cbe0d6f5dd6dabfc78adb56bcb4be98ab6e
3
+ size 13833