tmnam20 commited on
Commit
bfc90e5
1 Parent(s): 638eaec

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +172 -0
trainer_state.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1074,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.14,
13
+ "learning_rate": 1.9068901303538176e-05,
14
+ "loss": 0.5145,
15
+ "step": 50
16
+ },
17
+ {
18
+ "epoch": 0.28,
19
+ "learning_rate": 1.813780260707635e-05,
20
+ "loss": 0.3205,
21
+ "step": 100
22
+ },
23
+ {
24
+ "epoch": 0.42,
25
+ "learning_rate": 1.7206703910614527e-05,
26
+ "loss": 0.3015,
27
+ "step": 150
28
+ },
29
+ {
30
+ "epoch": 0.56,
31
+ "learning_rate": 1.62756052141527e-05,
32
+ "loss": 0.293,
33
+ "step": 200
34
+ },
35
+ {
36
+ "epoch": 0.7,
37
+ "learning_rate": 1.5344506517690876e-05,
38
+ "loss": 0.2785,
39
+ "step": 250
40
+ },
41
+ {
42
+ "epoch": 0.84,
43
+ "learning_rate": 1.4413407821229052e-05,
44
+ "loss": 0.2493,
45
+ "step": 300
46
+ },
47
+ {
48
+ "epoch": 0.98,
49
+ "learning_rate": 1.3482309124767226e-05,
50
+ "loss": 0.2353,
51
+ "step": 350
52
+ },
53
+ {
54
+ "epoch": 1.12,
55
+ "learning_rate": 1.25512104283054e-05,
56
+ "loss": 0.1852,
57
+ "step": 400
58
+ },
59
+ {
60
+ "epoch": 1.26,
61
+ "learning_rate": 1.1620111731843577e-05,
62
+ "loss": 0.2271,
63
+ "step": 450
64
+ },
65
+ {
66
+ "epoch": 1.4,
67
+ "learning_rate": 1.0689013035381753e-05,
68
+ "loss": 0.1942,
69
+ "step": 500
70
+ },
71
+ {
72
+ "epoch": 1.4,
73
+ "eval_accuracy": 0.9241945672773215,
74
+ "eval_loss": 0.24164709448814392,
75
+ "eval_runtime": 1.2031,
76
+ "eval_samples_per_second": 1315.82,
77
+ "eval_steps_per_second": 82.291,
78
+ "step": 500
79
+ },
80
+ {
81
+ "epoch": 1.54,
82
+ "learning_rate": 9.757914338919926e-06,
83
+ "loss": 0.2107,
84
+ "step": 550
85
+ },
86
+ {
87
+ "epoch": 1.68,
88
+ "learning_rate": 8.826815642458101e-06,
89
+ "loss": 0.1722,
90
+ "step": 600
91
+ },
92
+ {
93
+ "epoch": 1.82,
94
+ "learning_rate": 7.895716945996277e-06,
95
+ "loss": 0.1699,
96
+ "step": 650
97
+ },
98
+ {
99
+ "epoch": 1.96,
100
+ "learning_rate": 6.964618249534451e-06,
101
+ "loss": 0.1847,
102
+ "step": 700
103
+ },
104
+ {
105
+ "epoch": 2.09,
106
+ "learning_rate": 6.033519553072626e-06,
107
+ "loss": 0.1489,
108
+ "step": 750
109
+ },
110
+ {
111
+ "epoch": 2.23,
112
+ "learning_rate": 5.102420856610801e-06,
113
+ "loss": 0.1347,
114
+ "step": 800
115
+ },
116
+ {
117
+ "epoch": 2.37,
118
+ "learning_rate": 4.171322160148976e-06,
119
+ "loss": 0.1427,
120
+ "step": 850
121
+ },
122
+ {
123
+ "epoch": 2.51,
124
+ "learning_rate": 3.240223463687151e-06,
125
+ "loss": 0.138,
126
+ "step": 900
127
+ },
128
+ {
129
+ "epoch": 2.65,
130
+ "learning_rate": 2.3091247672253262e-06,
131
+ "loss": 0.1535,
132
+ "step": 950
133
+ },
134
+ {
135
+ "epoch": 2.79,
136
+ "learning_rate": 1.378026070763501e-06,
137
+ "loss": 0.1297,
138
+ "step": 1000
139
+ },
140
+ {
141
+ "epoch": 2.79,
142
+ "eval_accuracy": 0.9336702463676564,
143
+ "eval_loss": 0.2394879162311554,
144
+ "eval_runtime": 1.8087,
145
+ "eval_samples_per_second": 875.196,
146
+ "eval_steps_per_second": 54.734,
147
+ "step": 1000
148
+ },
149
+ {
150
+ "epoch": 2.93,
151
+ "learning_rate": 4.46927374301676e-07,
152
+ "loss": 0.124,
153
+ "step": 1050
154
+ },
155
+ {
156
+ "epoch": 3.0,
157
+ "step": 1074,
158
+ "total_flos": 928844837279604.0,
159
+ "train_loss": 0.21266074331557064,
160
+ "train_runtime": 147.2894,
161
+ "train_samples_per_second": 232.726,
162
+ "train_steps_per_second": 7.292
163
+ }
164
+ ],
165
+ "logging_steps": 50,
166
+ "max_steps": 1074,
167
+ "num_train_epochs": 3,
168
+ "save_steps": 500,
169
+ "total_flos": 928844837279604.0,
170
+ "trial_name": null,
171
+ "trial_params": null
172
+ }