yaniseuranova commited on
Commit
7d95ddc
1 Parent(s): f263a18

Training in progress, step 60

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.0,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.02930055744946003,
5
- "eval_runtime": 2.166,
6
- "eval_samples_per_second": 26.777,
7
- "eval_steps_per_second": 3.693,
8
  "total_flos": 1.7978141590585344e+16,
9
- "train_loss": 0.3378882090250651,
10
- "train_runtime": 22.8868,
11
- "train_samples_per_second": 10.137,
12
- "train_steps_per_second": 2.622
13
  }
 
1
  {
2
  "epoch": 4.0,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.029831457883119583,
5
+ "eval_runtime": 1.8404,
6
+ "eval_samples_per_second": 31.515,
7
+ "eval_steps_per_second": 4.347,
8
  "total_flos": 1.7978141590585344e+16,
9
+ "train_loss": 0.3446396509806315,
10
+ "train_runtime": 22.8779,
11
+ "train_samples_per_second": 10.141,
12
+ "train_steps_per_second": 2.623
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "eval_accuracy": 1.0,
4
- "eval_loss": 0.02930055744946003,
5
- "eval_runtime": 2.166,
6
- "eval_samples_per_second": 26.777,
7
- "eval_steps_per_second": 3.693
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "eval_accuracy": 1.0,
4
+ "eval_loss": 0.029831457883119583,
5
+ "eval_runtime": 1.8404,
6
+ "eval_samples_per_second": 31.515,
7
+ "eval_steps_per_second": 4.347
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:953fd1dc14bcfe4abbbc4074f5f494a5910fdeb3f3c545442210eccd9e7d17fe
3
  size 343223968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76f6a0c3f5d8ebc533070a28a26b4a506940275f2cb3f217bd38ffa3a1d0c00
3
  size 343223968
runs/Aug30_08-27-51_7e2de4304170/events.out.tfevents.1725006516.7e2de4304170 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5dde671809bfb4942efed02c7f3bc2798413ead150db9a5be474ceefebf1932
3
+ size 357
runs/Aug30_08-34-08_7e2de4304170/events.out.tfevents.1725006849.7e2de4304170 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f281a23128f51cc7b3d653bc5cad34c725c99e2757fa2ab8e969b3186db108
3
+ size 6439
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 1.7978141590585344e+16,
4
- "train_loss": 0.3378882090250651,
5
- "train_runtime": 22.8868,
6
- "train_samples_per_second": 10.137,
7
- "train_steps_per_second": 2.622
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 1.7978141590585344e+16,
4
+ "train_loss": 0.3446396509806315,
5
+ "train_runtime": 22.8779,
6
+ "train_samples_per_second": 10.141,
7
+ "train_steps_per_second": 2.623
8
  }
trainer_state.json CHANGED
@@ -10,54 +10,54 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.6666666666666666,
13
- "grad_norm": 1.332340121269226,
14
  "learning_rate": 0.0001666666666666667,
15
- "loss": 0.6343,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.3333333333333333,
20
- "grad_norm": 2.4813578128814697,
21
  "learning_rate": 0.00013333333333333334,
22
- "loss": 0.6705,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 2.0,
27
- "grad_norm": 12.414591789245605,
28
  "learning_rate": 0.0001,
29
- "loss": 0.4595,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 2.6666666666666665,
34
- "grad_norm": 0.5009147524833679,
35
  "learning_rate": 6.666666666666667e-05,
36
- "loss": 0.1393,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 3.3333333333333335,
41
- "grad_norm": 0.22140513360500336,
42
  "learning_rate": 3.3333333333333335e-05,
43
- "loss": 0.0828,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 4.0,
48
- "grad_norm": 0.21809758245944977,
49
  "learning_rate": 0.0,
50
- "loss": 0.0409,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 4.0,
55
  "step": 60,
56
  "total_flos": 1.7978141590585344e+16,
57
- "train_loss": 0.3378882090250651,
58
- "train_runtime": 22.8868,
59
- "train_samples_per_second": 10.137,
60
- "train_steps_per_second": 2.622
61
  }
62
  ],
63
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.6666666666666666,
13
+ "grad_norm": 0.9146473407745361,
14
  "learning_rate": 0.0001666666666666667,
15
+ "loss": 0.636,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 1.3333333333333333,
20
+ "grad_norm": 2.536752700805664,
21
  "learning_rate": 0.00013333333333333334,
22
+ "loss": 0.6707,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 2.0,
27
+ "grad_norm": 8.357431411743164,
28
  "learning_rate": 0.0001,
29
+ "loss": 0.448,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 2.6666666666666665,
34
+ "grad_norm": 0.41346922516822815,
35
  "learning_rate": 6.666666666666667e-05,
36
+ "loss": 0.1903,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 3.3333333333333335,
41
+ "grad_norm": 0.2532810568809509,
42
  "learning_rate": 3.3333333333333335e-05,
43
+ "loss": 0.0844,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 4.0,
48
+ "grad_norm": 0.21754373610019684,
49
  "learning_rate": 0.0,
50
+ "loss": 0.0384,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 4.0,
55
  "step": 60,
56
  "total_flos": 1.7978141590585344e+16,
57
+ "train_loss": 0.3446396509806315,
58
+ "train_runtime": 22.8779,
59
+ "train_samples_per_second": 10.141,
60
+ "train_steps_per_second": 2.623
61
  }
62
  ],
63
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba7eb858551fcb097ddfd61ed2f90174151a8d0add0067132e152ce53542b915
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9afae5936ded42ee4b1a55ec163bc421cd49c1502968a260b3c94b7dde3d5ec0
3
  size 5176