baohuynhbk14 commited on
Commit
d0477fb
1 Parent(s): 77f2c60

Model save

Browse files
Files changed (1) hide show
  1. trainer_state.json +59 -45
trainer_state.json CHANGED
@@ -3,107 +3,121 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 1000,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.08333333333333333,
13
- "grad_norm": 1.0525362491607666,
14
  "learning_rate": 1e-06,
15
- "loss": 1.5648,
16
  "step": 5
17
  },
18
  {
19
- "epoch": 0.16666666666666666,
20
- "grad_norm": 1.0406826734542847,
21
  "learning_rate": 1e-06,
22
- "loss": 1.4231,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.25,
27
- "grad_norm": 1.1155447959899902,
28
  "learning_rate": 1e-06,
29
- "loss": 1.4725,
30
  "step": 15
31
  },
32
  {
33
- "epoch": 0.3333333333333333,
34
- "grad_norm": 1.1421337127685547,
35
  "learning_rate": 1e-06,
36
- "loss": 1.4378,
37
  "step": 20
38
  },
39
  {
40
- "epoch": 0.4166666666666667,
41
- "grad_norm": 1.1258127689361572,
42
  "learning_rate": 1e-06,
43
- "loss": 1.4757,
44
  "step": 25
45
  },
46
  {
47
- "epoch": 0.5,
48
- "grad_norm": 0.9541631937026978,
49
  "learning_rate": 1e-06,
50
- "loss": 1.4635,
51
  "step": 30
52
  },
53
  {
54
- "epoch": 0.5833333333333334,
55
- "grad_norm": 0.9896816611289978,
56
  "learning_rate": 1e-06,
57
- "loss": 1.4231,
58
  "step": 35
59
  },
60
  {
61
- "epoch": 0.6666666666666666,
62
- "grad_norm": 0.9481335282325745,
63
  "learning_rate": 1e-06,
64
- "loss": 1.3768,
65
  "step": 40
66
  },
67
  {
68
- "epoch": 0.75,
69
- "grad_norm": 1.1283329725265503,
70
  "learning_rate": 1e-06,
71
- "loss": 1.3612,
72
  "step": 45
73
  },
74
  {
75
- "epoch": 0.8333333333333334,
76
- "grad_norm": 0.891376256942749,
77
  "learning_rate": 1e-06,
78
- "loss": 1.4153,
79
  "step": 50
80
  },
81
  {
82
- "epoch": 0.9166666666666666,
83
- "grad_norm": 1.1683685779571533,
84
  "learning_rate": 1e-06,
85
- "loss": 1.4396,
86
  "step": 55
87
  },
88
  {
89
- "epoch": 1.0,
90
- "grad_norm": 0.9238619208335876,
91
  "learning_rate": 1e-06,
92
- "loss": 1.4343,
93
  "step": 60
94
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  {
96
  "epoch": 1.0,
97
- "step": 60,
98
- "total_flos": 4.573495361431142e+16,
99
- "train_loss": 1.440619428952535,
100
- "train_runtime": 599.9581,
101
- "train_samples_per_second": 1.593,
102
- "train_steps_per_second": 0.1
103
  }
104
  ],
105
  "logging_steps": 5,
106
- "max_steps": 60,
107
  "num_input_tokens_seen": 0,
108
  "num_train_epochs": 1,
109
  "save_steps": 200,
@@ -119,7 +133,7 @@
119
  "attributes": {}
120
  }
121
  },
122
- "total_flos": 4.573495361431142e+16,
123
  "train_batch_size": 8,
124
  "trial_name": null,
125
  "trial_params": null
 
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
  "eval_steps": 1000,
6
+ "global_step": 72,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.06944444444444445,
13
+ "grad_norm": 4.683212757110596,
14
  "learning_rate": 1e-06,
15
+ "loss": 2.5448,
16
  "step": 5
17
  },
18
  {
19
+ "epoch": 0.1388888888888889,
20
+ "grad_norm": 4.721557140350342,
21
  "learning_rate": 1e-06,
22
+ "loss": 2.4235,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.20833333333333334,
27
+ "grad_norm": 3.7502753734588623,
28
  "learning_rate": 1e-06,
29
+ "loss": 2.5413,
30
  "step": 15
31
  },
32
  {
33
+ "epoch": 0.2777777777777778,
34
+ "grad_norm": 4.788341045379639,
35
  "learning_rate": 1e-06,
36
+ "loss": 2.7545,
37
  "step": 20
38
  },
39
  {
40
+ "epoch": 0.3472222222222222,
41
+ "grad_norm": 4.027582168579102,
42
  "learning_rate": 1e-06,
43
+ "loss": 2.398,
44
  "step": 25
45
  },
46
  {
47
+ "epoch": 0.4166666666666667,
48
+ "grad_norm": 4.083437442779541,
49
  "learning_rate": 1e-06,
50
+ "loss": 2.737,
51
  "step": 30
52
  },
53
  {
54
+ "epoch": 0.4861111111111111,
55
+ "grad_norm": 3.840348720550537,
56
  "learning_rate": 1e-06,
57
+ "loss": 2.6366,
58
  "step": 35
59
  },
60
  {
61
+ "epoch": 0.5555555555555556,
62
+ "grad_norm": 3.376926898956299,
63
  "learning_rate": 1e-06,
64
+ "loss": 2.5638,
65
  "step": 40
66
  },
67
  {
68
+ "epoch": 0.625,
69
+ "grad_norm": 3.656587600708008,
70
  "learning_rate": 1e-06,
71
+ "loss": 2.7568,
72
  "step": 45
73
  },
74
  {
75
+ "epoch": 0.6944444444444444,
76
+ "grad_norm": 2.5398614406585693,
77
  "learning_rate": 1e-06,
78
+ "loss": 2.3856,
79
  "step": 50
80
  },
81
  {
82
+ "epoch": 0.7638888888888888,
83
+ "grad_norm": 2.253296136856079,
84
  "learning_rate": 1e-06,
85
+ "loss": 2.268,
86
  "step": 55
87
  },
88
  {
89
+ "epoch": 0.8333333333333334,
90
+ "grad_norm": 2.464299201965332,
91
  "learning_rate": 1e-06,
92
+ "loss": 2.45,
93
  "step": 60
94
  },
95
+ {
96
+ "epoch": 0.9027777777777778,
97
+ "grad_norm": 2.9303369522094727,
98
+ "learning_rate": 1e-06,
99
+ "loss": 2.5548,
100
+ "step": 65
101
+ },
102
+ {
103
+ "epoch": 0.9722222222222222,
104
+ "grad_norm": 1.658677339553833,
105
+ "learning_rate": 1e-06,
106
+ "loss": 2.3566,
107
+ "step": 70
108
+ },
109
  {
110
  "epoch": 1.0,
111
+ "step": 72,
112
+ "total_flos": 4.988916731582874e+16,
113
+ "train_loss": 2.5252017312579684,
114
+ "train_runtime": 672.6439,
115
+ "train_samples_per_second": 1.713,
116
+ "train_steps_per_second": 0.107
117
  }
118
  ],
119
  "logging_steps": 5,
120
+ "max_steps": 72,
121
  "num_input_tokens_seen": 0,
122
  "num_train_epochs": 1,
123
  "save_steps": 200,
 
133
  "attributes": {}
134
  }
135
  },
136
+ "total_flos": 4.988916731582874e+16,
137
  "train_batch_size": 8,
138
  "trial_name": null,
139
  "trial_params": null