jflotz commited on
Commit
0cbf4ed
1 Parent(s): 3e23b11

Training in progress, step 110000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2756bd704606cd6c5c35ea5f45a21e975a94f7ac54bd0802ebe8750dfbd1eba8
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bb8f4ee9932432a287ea513e3af3b078520ad61de1f8f7e44c83962a189c305
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fc703713a0fad50a78f7cc73423f660e122486f1451ea9412d49c8df9646af6
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:692f0849f73d31d0d7ef6f2f2af3ccd645c1956f15cf41f9debc0106e2cea5be
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94f403b594a29ecb6816cee93c65f5e3a0566d5747151b3697716adac4e9951
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2eacfeedd0bf9408bd36cab2e1c6eae31897175ab967af2fed6efe328f2b5f
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94f403b594a29ecb6816cee93c65f5e3a0566d5747151b3697716adac4e9951
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2eacfeedd0bf9408bd36cab2e1c6eae31897175ab967af2fed6efe328f2b5f
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94f403b594a29ecb6816cee93c65f5e3a0566d5747151b3697716adac4e9951
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2eacfeedd0bf9408bd36cab2e1c6eae31897175ab967af2fed6efe328f2b5f
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94f403b594a29ecb6816cee93c65f5e3a0566d5747151b3697716adac4e9951
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2eacfeedd0bf9408bd36cab2e1c6eae31897175ab967af2fed6efe328f2b5f
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94f403b594a29ecb6816cee93c65f5e3a0566d5747151b3697716adac4e9951
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2eacfeedd0bf9408bd36cab2e1c6eae31897175ab967af2fed6efe328f2b5f
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94f403b594a29ecb6816cee93c65f5e3a0566d5747151b3697716adac4e9951
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2eacfeedd0bf9408bd36cab2e1c6eae31897175ab967af2fed6efe328f2b5f
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94f403b594a29ecb6816cee93c65f5e3a0566d5747151b3697716adac4e9951
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2eacfeedd0bf9408bd36cab2e1c6eae31897175ab967af2fed6efe328f2b5f
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94f403b594a29ecb6816cee93c65f5e3a0566d5747151b3697716adac4e9951
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b2eacfeedd0bf9408bd36cab2e1c6eae31897175ab967af2fed6efe328f2b5f
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d85ea74361bfabc4dca40ed2a4dec24f25124d91f625a1176acad7044d70175
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe4bb2f202e1c5bfac6f58d7d7aff54991c6919cce0ee2976f31297f1718992f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.461099214846538,
5
- "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2006,11 +2006,211 @@
2006
  "eval_samples_per_second": 1041.22,
2007
  "eval_steps_per_second": 16.319,
2008
  "step": 100000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2009
  }
2010
  ],
2011
  "max_steps": 250000,
2012
  "num_train_epochs": 12,
2013
- "total_flos": 1.6016515007293466e+21,
2014
  "trial_name": null,
2015
  "trial_params": null
2016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.907209136331192,
5
+ "global_step": 110000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2006
  "eval_samples_per_second": 1041.22,
2007
  "eval_steps_per_second": 16.319,
2008
  "step": 100000
2009
+ },
2010
+ {
2011
+ "epoch": 4.48,
2012
+ "learning_rate": 0.00042171081131341917,
2013
+ "loss": 0.3996,
2014
+ "step": 100500
2015
+ },
2016
+ {
2017
+ "epoch": 4.51,
2018
+ "learning_rate": 0.00041991636704523497,
2019
+ "loss": 0.3995,
2020
+ "step": 101000
2021
+ },
2022
+ {
2023
+ "epoch": 4.51,
2024
+ "eval_loss": 0.37412840127944946,
2025
+ "eval_runtime": 2.2118,
2026
+ "eval_samples_per_second": 1038.514,
2027
+ "eval_steps_per_second": 16.276,
2028
+ "step": 101000
2029
+ },
2030
+ {
2031
+ "epoch": 4.53,
2032
+ "learning_rate": 0.00041811689596287893,
2033
+ "loss": 0.3989,
2034
+ "step": 101500
2035
+ },
2036
+ {
2037
+ "epoch": 4.55,
2038
+ "learning_rate": 0.0004163124767810454,
2039
+ "loss": 0.3985,
2040
+ "step": 102000
2041
+ },
2042
+ {
2043
+ "epoch": 4.55,
2044
+ "eval_loss": 0.37019699811935425,
2045
+ "eval_runtime": 2.207,
2046
+ "eval_samples_per_second": 1040.78,
2047
+ "eval_steps_per_second": 16.312,
2048
+ "step": 102000
2049
+ },
2050
+ {
2051
+ "epoch": 4.57,
2052
+ "learning_rate": 0.00041450318843087506,
2053
+ "loss": 0.3983,
2054
+ "step": 102500
2055
+ },
2056
+ {
2057
+ "epoch": 4.59,
2058
+ "learning_rate": 0.00041268911005650166,
2059
+ "loss": 0.3981,
2060
+ "step": 103000
2061
+ },
2062
+ {
2063
+ "epoch": 4.59,
2064
+ "eval_loss": 0.3799527883529663,
2065
+ "eval_runtime": 2.7041,
2066
+ "eval_samples_per_second": 849.448,
2067
+ "eval_steps_per_second": 13.313,
2068
+ "step": 103000
2069
+ },
2070
+ {
2071
+ "epoch": 4.62,
2072
+ "learning_rate": 0.00041087032101159006,
2073
+ "loss": 0.3978,
2074
+ "step": 103500
2075
+ },
2076
+ {
2077
+ "epoch": 4.64,
2078
+ "learning_rate": 0.00040904690085586515,
2079
+ "loss": 0.3986,
2080
+ "step": 104000
2081
+ },
2082
+ {
2083
+ "epoch": 4.64,
2084
+ "eval_loss": 0.3734039068222046,
2085
+ "eval_runtime": 2.2569,
2086
+ "eval_samples_per_second": 1017.749,
2087
+ "eval_steps_per_second": 15.951,
2088
+ "step": 104000
2089
+ },
2090
+ {
2091
+ "epoch": 4.66,
2092
+ "learning_rate": 0.0004072189293516316,
2093
+ "loss": 0.3969,
2094
+ "step": 104500
2095
+ },
2096
+ {
2097
+ "epoch": 4.68,
2098
+ "learning_rate": 0.0004053864864602847,
2099
+ "loss": 0.3966,
2100
+ "step": 105000
2101
+ },
2102
+ {
2103
+ "epoch": 4.68,
2104
+ "eval_loss": 0.37050846219062805,
2105
+ "eval_runtime": 2.2224,
2106
+ "eval_samples_per_second": 1033.581,
2107
+ "eval_steps_per_second": 16.199,
2108
+ "step": 105000
2109
+ },
2110
+ {
2111
+ "epoch": 4.71,
2112
+ "learning_rate": 0.00040354965233881297,
2113
+ "loss": 0.3961,
2114
+ "step": 105500
2115
+ },
2116
+ {
2117
+ "epoch": 4.73,
2118
+ "learning_rate": 0.0004017085073362913,
2119
+ "loss": 0.3957,
2120
+ "step": 106000
2121
+ },
2122
+ {
2123
+ "epoch": 4.73,
2124
+ "eval_loss": 0.36802881956100464,
2125
+ "eval_runtime": 2.2556,
2126
+ "eval_samples_per_second": 1018.364,
2127
+ "eval_steps_per_second": 15.96,
2128
+ "step": 106000
2129
+ },
2130
+ {
2131
+ "epoch": 4.75,
2132
+ "learning_rate": 0.00039986313199036664,
2133
+ "loss": 0.3956,
2134
+ "step": 106500
2135
+ },
2136
+ {
2137
+ "epoch": 4.77,
2138
+ "learning_rate": 0.00039801360702373484,
2139
+ "loss": 0.3957,
2140
+ "step": 107000
2141
+ },
2142
+ {
2143
+ "epoch": 4.77,
2144
+ "eval_loss": 0.3662741482257843,
2145
+ "eval_runtime": 2.5144,
2146
+ "eval_samples_per_second": 913.525,
2147
+ "eval_steps_per_second": 14.317,
2148
+ "step": 107000
2149
+ },
2150
+ {
2151
+ "epoch": 4.8,
2152
+ "learning_rate": 0.00039616001334060954,
2153
+ "loss": 0.3951,
2154
+ "step": 107500
2155
+ },
2156
+ {
2157
+ "epoch": 4.82,
2158
+ "learning_rate": 0.00039430243202318314,
2159
+ "loss": 0.3948,
2160
+ "step": 108000
2161
+ },
2162
+ {
2163
+ "epoch": 4.82,
2164
+ "eval_loss": 0.36831599473953247,
2165
+ "eval_runtime": 2.2146,
2166
+ "eval_samples_per_second": 1037.206,
2167
+ "eval_steps_per_second": 16.256,
2168
+ "step": 108000
2169
+ },
2170
+ {
2171
+ "epoch": 4.84,
2172
+ "learning_rate": 0.00039244094432808034,
2173
+ "loss": 0.3945,
2174
+ "step": 108500
2175
+ },
2176
+ {
2177
+ "epoch": 4.86,
2178
+ "learning_rate": 0.0003905756316828033,
2179
+ "loss": 0.3943,
2180
+ "step": 109000
2181
+ },
2182
+ {
2183
+ "epoch": 4.86,
2184
+ "eval_loss": 0.36974549293518066,
2185
+ "eval_runtime": 2.1759,
2186
+ "eval_samples_per_second": 1055.667,
2187
+ "eval_steps_per_second": 16.545,
2188
+ "step": 109000
2189
+ },
2190
+ {
2191
+ "epoch": 4.88,
2192
+ "learning_rate": 0.00038870657568216963,
2193
+ "loss": 0.3939,
2194
+ "step": 109500
2195
+ },
2196
+ {
2197
+ "epoch": 4.91,
2198
+ "learning_rate": 0.00038683385808474416,
2199
+ "loss": 0.3936,
2200
+ "step": 110000
2201
+ },
2202
+ {
2203
+ "epoch": 4.91,
2204
+ "eval_loss": 0.3671797811985016,
2205
+ "eval_runtime": 2.2064,
2206
+ "eval_samples_per_second": 1041.083,
2207
+ "eval_steps_per_second": 16.316,
2208
+ "step": 110000
2209
  }
2210
  ],
2211
  "max_steps": 250000,
2212
  "num_train_epochs": 12,
2213
+ "total_flos": 1.7618195035951506e+21,
2214
  "trial_name": null,
2215
  "trial_params": null
2216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fc703713a0fad50a78f7cc73423f660e122486f1451ea9412d49c8df9646af6
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:692f0849f73d31d0d7ef6f2f2af3ccd645c1956f15cf41f9debc0106e2cea5be
3
  size 25761253