talargv commited on
Commit
8e2eab6
1 Parent(s): 872b96f

End of training

Browse files
Files changed (1) hide show
  1. trainer_state.json +32 -46
trainer_state.json CHANGED
@@ -1,69 +1,55 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8,
5
  "eval_steps": 500,
6
- "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.32,
13
- "grad_norm": 0.17948530614376068,
14
- "learning_rate": 0.0001666666666666667,
15
- "loss": 8.9023,
16
- "step": 2
17
- },
18
- {
19
- "epoch": 0.64,
20
- "grad_norm": 0.33971741795539856,
21
- "learning_rate": 0.00013333333333333334,
22
- "loss": 8.7931,
23
- "step": 4
24
- },
25
- {
26
- "epoch": 0.96,
27
- "grad_norm": 0.4960370361804962,
28
  "learning_rate": 0.0001,
29
- "loss": 8.6895,
30
- "step": 6
31
  },
32
  {
33
- "epoch": 1.16,
34
- "grad_norm": 0.5553238391876221,
35
- "learning_rate": 6.666666666666667e-05,
36
- "loss": 5.3485,
37
- "step": 8
38
  },
39
  {
40
- "epoch": 1.48,
41
- "grad_norm": 0.7772231698036194,
42
- "learning_rate": 3.3333333333333335e-05,
43
- "loss": 8.4708,
44
- "step": 10
45
  },
46
  {
47
- "epoch": 1.8,
48
- "grad_norm": 0.7240044474601746,
49
- "learning_rate": 0.0,
50
- "loss": 8.4396,
51
- "step": 12
52
  },
53
  {
54
- "epoch": 1.8,
55
- "step": 12,
56
- "total_flos": 33316935156720.0,
57
- "train_loss": 8.10729185740153,
58
- "train_runtime": 84.0152,
59
- "train_samples_per_second": 2.381,
60
- "train_steps_per_second": 0.143
61
  }
62
  ],
63
- "logging_steps": 2,
64
- "max_steps": 12,
65
  "num_input_tokens_seen": 0,
66
- "num_train_epochs": 2,
67
  "save_steps": 500,
68
  "stateful_callbacks": {
69
  "TrainerControl": {
@@ -77,7 +63,7 @@
77
  "attributes": {}
78
  }
79
  },
80
- "total_flos": 33316935156720.0,
81
  "train_batch_size": 2,
82
  "trial_name": null,
83
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 4,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "grad_norm": Infinity,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "learning_rate": 0.0001,
15
+ "loss": 3.4942,
16
+ "step": 1
17
  },
18
  {
19
+ "epoch": 2.0,
20
+ "grad_norm": 90.76869201660156,
21
+ "learning_rate": 7.500000000000001e-05,
22
+ "loss": 3.5171,
23
+ "step": 2
24
  },
25
  {
26
+ "epoch": 3.0,
27
+ "grad_norm": 52.46024703979492,
28
+ "learning_rate": 5e-05,
29
+ "loss": 3.0473,
30
+ "step": 3
31
  },
32
  {
33
+ "epoch": 4.0,
34
+ "grad_norm": 12.389888763427734,
35
+ "learning_rate": 2.5e-05,
36
+ "loss": 2.8469,
37
+ "step": 4
38
  },
39
  {
40
+ "epoch": 4.0,
41
+ "step": 4,
42
+ "total_flos": 333548660400.0,
43
+ "train_loss": 3.2263943552970886,
44
+ "train_runtime": 27.0103,
45
+ "train_samples_per_second": 0.74,
46
+ "train_steps_per_second": 0.148
47
  }
48
  ],
49
+ "logging_steps": 1,
50
+ "max_steps": 4,
51
  "num_input_tokens_seen": 0,
52
+ "num_train_epochs": 4,
53
  "save_steps": 500,
54
  "stateful_callbacks": {
55
  "TrainerControl": {
 
63
  "attributes": {}
64
  }
65
  },
66
+ "total_flos": 333548660400.0,
67
  "train_batch_size": 2,
68
  "trial_name": null,
69
  "trial_params": null