NekoFi commited on
Commit
50bd42e
1 Parent(s): f1044ff

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +132 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.6923076923076925,
3
+ "total_flos": 3.505150325906473e+17,
4
+ "train_loss": 0.428266316652298,
5
+ "train_runtime": 301.1045,
6
+ "train_samples_per_second": 5.526,
7
+ "train_steps_per_second": 0.08
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.6923076923076925,
3
+ "total_flos": 3.505150325906473e+17,
4
+ "train_loss": 0.428266316652298,
5
+ "train_runtime": 301.1045,
6
+ "train_samples_per_second": 5.526,
7
+ "train_steps_per_second": 0.08
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9361702127659575,
3
+ "best_model_checkpoint": "portrait_cosu_exp3/checkpoint-13",
4
+ "epoch": 3.6923076923076925,
5
+ "eval_steps": 500,
6
+ "global_step": 24,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.9230769230769231,
13
+ "eval_accuracy": 0.851063829787234,
14
+ "eval_confusion_matrix": [
15
+ [
16
+ 17,
17
+ 3
18
+ ],
19
+ [
20
+ 4,
21
+ 23
22
+ ]
23
+ ],
24
+ "eval_f1": 0.8514750663363719,
25
+ "eval_loss": 0.2920527458190918,
26
+ "eval_precision": 0.8526615228742889,
27
+ "eval_recall": 0.851063829787234,
28
+ "eval_runtime": 3.0689,
29
+ "eval_samples_per_second": 15.315,
30
+ "eval_steps_per_second": 0.978,
31
+ "step": 6
32
+ },
33
+ {
34
+ "epoch": 1.5384615384615383,
35
+ "grad_norm": 7.153378486633301,
36
+ "learning_rate": 3.3333333333333335e-05,
37
+ "loss": 0.5415,
38
+ "step": 10
39
+ },
40
+ {
41
+ "epoch": 2.0,
42
+ "eval_accuracy": 0.9361702127659575,
43
+ "eval_confusion_matrix": [
44
+ [
45
+ 17,
46
+ 3
47
+ ],
48
+ [
49
+ 0,
50
+ 27
51
+ ]
52
+ ],
53
+ "eval_f1": 0.9352622499319029,
54
+ "eval_loss": 0.25635045766830444,
55
+ "eval_precision": 0.9425531914893617,
56
+ "eval_recall": 0.9361702127659575,
57
+ "eval_runtime": 3.2937,
58
+ "eval_samples_per_second": 14.27,
59
+ "eval_steps_per_second": 0.911,
60
+ "step": 13
61
+ },
62
+ {
63
+ "epoch": 2.9230769230769234,
64
+ "eval_accuracy": 0.8723404255319149,
65
+ "eval_confusion_matrix": [
66
+ [
67
+ 19,
68
+ 1
69
+ ],
70
+ [
71
+ 5,
72
+ 22
73
+ ]
74
+ ],
75
+ "eval_f1": 0.873036750483559,
76
+ "eval_loss": 0.3604692220687866,
77
+ "eval_precision": 0.8863706444650015,
78
+ "eval_recall": 0.8723404255319149,
79
+ "eval_runtime": 3.1924,
80
+ "eval_samples_per_second": 14.722,
81
+ "eval_steps_per_second": 0.94,
82
+ "step": 19
83
+ },
84
+ {
85
+ "epoch": 3.076923076923077,
86
+ "grad_norm": 9.893902778625488,
87
+ "learning_rate": 9.523809523809523e-06,
88
+ "loss": 0.378,
89
+ "step": 20
90
+ },
91
+ {
92
+ "epoch": 3.6923076923076925,
93
+ "eval_accuracy": 0.9148936170212766,
94
+ "eval_confusion_matrix": [
95
+ [
96
+ 19,
97
+ 1
98
+ ],
99
+ [
100
+ 3,
101
+ 24
102
+ ]
103
+ ],
104
+ "eval_f1": 0.9152832982620216,
105
+ "eval_loss": 0.25239235162734985,
106
+ "eval_precision": 0.9189941972920695,
107
+ "eval_recall": 0.9148936170212766,
108
+ "eval_runtime": 2.9295,
109
+ "eval_samples_per_second": 16.044,
110
+ "eval_steps_per_second": 1.024,
111
+ "step": 24
112
+ },
113
+ {
114
+ "epoch": 3.6923076923076925,
115
+ "step": 24,
116
+ "total_flos": 3.505150325906473e+17,
117
+ "train_loss": 0.428266316652298,
118
+ "train_runtime": 301.1045,
119
+ "train_samples_per_second": 5.526,
120
+ "train_steps_per_second": 0.08
121
+ }
122
+ ],
123
+ "logging_steps": 10,
124
+ "max_steps": 24,
125
+ "num_input_tokens_seen": 0,
126
+ "num_train_epochs": 4,
127
+ "save_steps": 500,
128
+ "total_flos": 3.505150325906473e+17,
129
+ "train_batch_size": 16,
130
+ "trial_name": null,
131
+ "trial_params": null
132
+ }