sharkMeow commited on
Commit
0cbf4fc
1 Parent(s): 034eda0

End of training

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. all_results.json +8 -8
  3. eval_results.json +4 -4
  4. train_results.json +4 -4
  5. trainer_state.json +68 -68
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [OFA-Sys/chinese-clip-vit-base-patch16](https://huggingface.co/OFA-Sys/chinese-clip-vit-base-patch16) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 4.8862
19
 
20
  ## Model description
21
 
 
15
 
16
  This model is a fine-tuned version of [OFA-Sys/chinese-clip-vit-base-patch16](https://huggingface.co/OFA-Sys/chinese-clip-vit-base-patch16) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 4.9099
19
 
20
  ## Model description
21
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 60.0,
3
- "eval_loss": 3.5839552879333496,
4
- "eval_runtime": 31.5919,
5
- "eval_samples_per_second": 307.516,
6
- "eval_steps_per_second": 6.995,
7
  "total_flos": 1.4015777943683174e+18,
8
- "train_loss": 2.0418326404735745,
9
- "train_runtime": 30807.4628,
10
- "train_samples_per_second": 115.158,
11
- "train_steps_per_second": 2.88
12
  }
 
1
  {
2
  "epoch": 60.0,
3
+ "eval_loss": 4.9098591804504395,
4
+ "eval_runtime": 31.7328,
5
+ "eval_samples_per_second": 306.15,
6
+ "eval_steps_per_second": 6.964,
7
  "total_flos": 1.4015777943683174e+18,
8
+ "train_loss": 2.04994026615675,
9
+ "train_runtime": 30910.218,
10
+ "train_samples_per_second": 114.776,
11
+ "train_steps_per_second": 2.871
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 60.0,
3
- "eval_loss": 3.5839552879333496,
4
- "eval_runtime": 31.5919,
5
- "eval_samples_per_second": 307.516,
6
- "eval_steps_per_second": 6.995
7
  }
 
1
  {
2
  "epoch": 60.0,
3
+ "eval_loss": 4.9098591804504395,
4
+ "eval_runtime": 31.7328,
5
+ "eval_samples_per_second": 306.15,
6
+ "eval_steps_per_second": 6.964
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 60.0,
3
  "total_flos": 1.4015777943683174e+18,
4
- "train_loss": 2.0418326404735745,
5
- "train_runtime": 30807.4628,
6
- "train_samples_per_second": 115.158,
7
- "train_steps_per_second": 2.88
8
  }
 
1
  {
2
  "epoch": 60.0,
3
  "total_flos": 1.4015777943683174e+18,
4
+ "train_loss": 2.04994026615675,
5
+ "train_runtime": 30910.218,
6
+ "train_samples_per_second": 114.776,
7
+ "train_steps_per_second": 2.871
8
  }
trainer_state.json CHANGED
@@ -10,162 +10,162 @@
10
  "log_history": [
11
  {
12
  "epoch": 6.0,
13
- "grad_norm": 2.476245641708374,
14
- "learning_rate": 9.0009015100293e-06,
15
- "loss": 2.4036,
16
  "step": 8874
17
  },
18
  {
19
  "epoch": 6.0,
20
- "eval_loss": 3.011286497116089,
21
- "eval_runtime": 32.2427,
22
- "eval_samples_per_second": 301.308,
23
- "eval_steps_per_second": 6.854,
24
  "step": 8874
25
  },
26
  {
27
  "epoch": 12.0,
28
- "grad_norm": 10.134785652160645,
29
  "learning_rate": 8.001239576290288e-06,
30
- "loss": 2.1954,
31
  "step": 17748
32
  },
33
  {
34
  "epoch": 12.0,
35
- "eval_loss": 3.1597039699554443,
36
- "eval_runtime": 31.8518,
37
- "eval_samples_per_second": 305.006,
38
- "eval_steps_per_second": 6.938,
39
  "step": 17748
40
  },
41
  {
42
  "epoch": 18.0,
43
- "grad_norm": 14.89818000793457,
44
  "learning_rate": 7.001690331304937e-06,
45
- "loss": 2.0709,
46
  "step": 26622
47
  },
48
  {
49
  "epoch": 18.0,
50
- "eval_loss": 3.206042528152466,
51
- "eval_runtime": 33.1331,
52
- "eval_samples_per_second": 293.211,
53
- "eval_steps_per_second": 6.67,
54
  "step": 26622
55
  },
56
  {
57
  "epoch": 24.0,
58
- "grad_norm": 20.7427921295166,
59
  "learning_rate": 6.002141086319586e-06,
60
- "loss": 2.0132,
61
  "step": 35496
62
  },
63
  {
64
  "epoch": 24.0,
65
- "eval_loss": 3.307133913040161,
66
- "eval_runtime": 31.6331,
67
- "eval_samples_per_second": 307.115,
68
- "eval_steps_per_second": 6.986,
69
  "step": 35496
70
  },
71
  {
72
  "epoch": 30.0,
73
- "grad_norm": 10.81413745880127,
74
  "learning_rate": 5.0025918413342355e-06,
75
- "loss": 1.9783,
76
  "step": 44370
77
  },
78
  {
79
  "epoch": 30.0,
80
- "eval_loss": 3.3543457984924316,
81
- "eval_runtime": 31.6425,
82
- "eval_samples_per_second": 307.024,
83
- "eval_steps_per_second": 6.984,
84
  "step": 44370
85
  },
86
  {
87
  "epoch": 36.0,
88
- "grad_norm": 13.179546356201172,
89
- "learning_rate": 4.003155285102547e-06,
90
- "loss": 1.9672,
91
  "step": 53244
92
  },
93
  {
94
  "epoch": 36.0,
95
- "eval_loss": 3.45923113822937,
96
- "eval_runtime": 31.6946,
97
- "eval_samples_per_second": 306.519,
98
- "eval_steps_per_second": 6.973,
99
  "step": 53244
100
  },
101
  {
102
  "epoch": 42.0,
103
- "grad_norm": 35.83116149902344,
104
  "learning_rate": 3.003606040117197e-06,
105
- "loss": 1.9536,
106
  "step": 62118
107
  },
108
  {
109
  "epoch": 42.0,
110
- "eval_loss": 3.473649740219116,
111
- "eval_runtime": 31.7269,
112
- "eval_samples_per_second": 306.207,
113
- "eval_steps_per_second": 6.966,
114
  "step": 62118
115
  },
116
  {
117
  "epoch": 48.0,
118
- "grad_norm": 2.7362611293792725,
119
- "learning_rate": 2.0041694838855083e-06,
120
- "loss": 1.9473,
121
  "step": 70992
122
  },
123
  {
124
  "epoch": 48.0,
125
- "eval_loss": 3.531722068786621,
126
- "eval_runtime": 31.6356,
127
- "eval_samples_per_second": 307.091,
128
- "eval_steps_per_second": 6.986,
129
  "step": 70992
130
  },
131
  {
132
  "epoch": 54.0,
133
- "grad_norm": 3.277851104736328,
134
- "learning_rate": 1.0046202389001578e-06,
135
- "loss": 1.9479,
136
  "step": 79866
137
  },
138
  {
139
  "epoch": 54.0,
140
- "eval_loss": 3.5619990825653076,
141
- "eval_runtime": 31.6245,
142
- "eval_samples_per_second": 307.199,
143
- "eval_steps_per_second": 6.988,
144
  "step": 79866
145
  },
146
  {
147
  "epoch": 60.0,
148
- "grad_norm": 2.524662733078003,
149
  "learning_rate": 5.070993914807302e-09,
150
- "loss": 1.9411,
151
  "step": 88740
152
  },
153
  {
154
  "epoch": 60.0,
155
- "eval_loss": 3.5839552879333496,
156
- "eval_runtime": 31.4889,
157
- "eval_samples_per_second": 308.522,
158
- "eval_steps_per_second": 7.018,
159
  "step": 88740
160
  },
161
  {
162
  "epoch": 60.0,
163
  "step": 88740,
164
  "total_flos": 1.4015777943683174e+18,
165
- "train_loss": 2.0418326404735745,
166
- "train_runtime": 30807.4628,
167
- "train_samples_per_second": 115.158,
168
- "train_steps_per_second": 2.88
169
  }
170
  ],
171
  "logging_steps": 8874,
 
10
  "log_history": [
11
  {
12
  "epoch": 6.0,
13
+ "grad_norm": 2.8650519847869873,
14
+ "learning_rate": 9.001014198782962e-06,
15
+ "loss": 2.4122,
16
  "step": 8874
17
  },
18
  {
19
  "epoch": 6.0,
20
+ "eval_loss": 3.9393680095672607,
21
+ "eval_runtime": 31.1663,
22
+ "eval_samples_per_second": 311.715,
23
+ "eval_steps_per_second": 7.091,
24
  "step": 8874
25
  },
26
  {
27
  "epoch": 12.0,
28
+ "grad_norm": 11.994268417358398,
29
  "learning_rate": 8.001239576290288e-06,
30
+ "loss": 2.2167,
31
  "step": 17748
32
  },
33
  {
34
  "epoch": 12.0,
35
+ "eval_loss": 4.1646857261657715,
36
+ "eval_runtime": 31.5415,
37
+ "eval_samples_per_second": 308.007,
38
+ "eval_steps_per_second": 7.007,
39
  "step": 17748
40
  },
41
  {
42
  "epoch": 18.0,
43
+ "grad_norm": 20.35555076599121,
44
  "learning_rate": 7.001690331304937e-06,
45
+ "loss": 2.0965,
46
  "step": 26622
47
  },
48
  {
49
  "epoch": 18.0,
50
+ "eval_loss": 4.430016994476318,
51
+ "eval_runtime": 31.5349,
52
+ "eval_samples_per_second": 308.071,
53
+ "eval_steps_per_second": 7.008,
54
  "step": 26622
55
  },
56
  {
57
  "epoch": 24.0,
58
+ "grad_norm": 3.9490978717803955,
59
  "learning_rate": 6.002141086319586e-06,
60
+ "loss": 2.0238,
61
  "step": 35496
62
  },
63
  {
64
  "epoch": 24.0,
65
+ "eval_loss": 4.574044227600098,
66
+ "eval_runtime": 31.6749,
67
+ "eval_samples_per_second": 306.709,
68
+ "eval_steps_per_second": 6.977,
69
  "step": 35496
70
  },
71
  {
72
  "epoch": 30.0,
73
+ "grad_norm": 9.400227546691895,
74
  "learning_rate": 5.0025918413342355e-06,
75
+ "loss": 1.9938,
76
  "step": 44370
77
  },
78
  {
79
  "epoch": 30.0,
80
+ "eval_loss": 4.62649393081665,
81
+ "eval_runtime": 31.7226,
82
+ "eval_samples_per_second": 306.249,
83
+ "eval_steps_per_second": 6.967,
84
  "step": 44370
85
  },
86
  {
87
  "epoch": 36.0,
88
+ "grad_norm": 5.468742370605469,
89
+ "learning_rate": 4.003042596348885e-06,
90
+ "loss": 1.973,
91
  "step": 53244
92
  },
93
  {
94
  "epoch": 36.0,
95
+ "eval_loss": 4.671382427215576,
96
+ "eval_runtime": 31.6041,
97
+ "eval_samples_per_second": 307.396,
98
+ "eval_steps_per_second": 6.993,
99
  "step": 53244
100
  },
101
  {
102
  "epoch": 42.0,
103
+ "grad_norm": 0.6607534289360046,
104
  "learning_rate": 3.003606040117197e-06,
105
+ "loss": 1.9583,
106
  "step": 62118
107
  },
108
  {
109
  "epoch": 42.0,
110
+ "eval_loss": 4.793060302734375,
111
+ "eval_runtime": 31.7099,
112
+ "eval_samples_per_second": 306.372,
113
+ "eval_steps_per_second": 6.969,
114
  "step": 62118
115
  },
116
  {
117
  "epoch": 48.0,
118
+ "grad_norm": 1.4975688457489014,
119
+ "learning_rate": 2.004056795131846e-06,
120
+ "loss": 1.9466,
121
  "step": 70992
122
  },
123
  {
124
  "epoch": 48.0,
125
+ "eval_loss": 4.7913103103637695,
126
+ "eval_runtime": 33.0493,
127
+ "eval_samples_per_second": 293.955,
128
+ "eval_steps_per_second": 6.687,
129
  "step": 70992
130
  },
131
  {
132
  "epoch": 54.0,
133
+ "grad_norm": 2.9576919078826904,
134
+ "learning_rate": 1.0045075501464953e-06,
135
+ "loss": 1.9415,
136
  "step": 79866
137
  },
138
  {
139
  "epoch": 54.0,
140
+ "eval_loss": 4.844839572906494,
141
+ "eval_runtime": 31.7934,
142
+ "eval_samples_per_second": 305.566,
143
+ "eval_steps_per_second": 6.951,
144
  "step": 79866
145
  },
146
  {
147
  "epoch": 60.0,
148
+ "grad_norm": 0.5801959037780762,
149
  "learning_rate": 5.070993914807302e-09,
150
+ "loss": 1.9369,
151
  "step": 88740
152
  },
153
  {
154
  "epoch": 60.0,
155
+ "eval_loss": 4.886190891265869,
156
+ "eval_runtime": 31.6305,
157
+ "eval_samples_per_second": 307.141,
158
+ "eval_steps_per_second": 6.987,
159
  "step": 88740
160
  },
161
  {
162
  "epoch": 60.0,
163
  "step": 88740,
164
  "total_flos": 1.4015777943683174e+18,
165
+ "train_loss": 2.04994026615675,
166
+ "train_runtime": 30910.218,
167
+ "train_samples_per_second": 114.776,
168
+ "train_steps_per_second": 2.871
169
  }
170
  ],
171
  "logging_steps": 8874,