xgampx commited on
Commit
fb74699
1 Parent(s): d0ba160

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "EleutherAI/gpt-neo-125M",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPTNeoForCausalLM"
6
+ ],
7
+ "attention_dropout": 0,
8
+ "attention_layers": [
9
+ "global",
10
+ "local",
11
+ "global",
12
+ "local",
13
+ "global",
14
+ "local",
15
+ "global",
16
+ "local",
17
+ "global",
18
+ "local",
19
+ "global",
20
+ "local"
21
+ ],
22
+ "attention_types": [
23
+ [
24
+ [
25
+ "global",
26
+ "local"
27
+ ],
28
+ 6
29
+ ]
30
+ ],
31
+ "bos_token_id": 50256,
32
+ "classifier_dropout": 0.1,
33
+ "embed_dropout": 0,
34
+ "eos_token_id": 50256,
35
+ "gradient_checkpointing": false,
36
+ "hidden_size": 768,
37
+ "initializer_range": 0.02,
38
+ "intermediate_size": null,
39
+ "layer_norm_epsilon": 1e-05,
40
+ "max_position_embeddings": 2048,
41
+ "model_type": "gpt_neo",
42
+ "num_heads": 12,
43
+ "num_layers": 12,
44
+ "resid_dropout": 0,
45
+ "summary_activation": null,
46
+ "summary_first_dropout": 0.1,
47
+ "summary_proj_to_labels": true,
48
+ "summary_type": "cls_index",
49
+ "summary_use_proj": true,
50
+ "torch_dtype": "float32",
51
+ "transformers_version": "4.40.2",
52
+ "use_cache": true,
53
+ "vocab_size": 50257,
54
+ "window_size": 256
55
+ }
eval_accuracies.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [0.23608017817371937, 0.26280623608017817, 0.24498886414253898, 0.2873051224944321, 0.24053452115812918, 0.23608017817371937, 0.2071269487750557, 0.27839643652561247, 0.26948775055679286, 0.3051224944320713, 0.2583518930957684, 0.2650334075723831, 0.2828507795100223, 0.2561247216035635, 0.24498886414253898, 0.2717149220489978, 0.2650334075723831, 0.2583518930957684, 0.23608017817371937, 0.24276169265033407, 0.2182628062360802, 0.2583518930957684, 0.26057906458797325, 0.24721603563474387, 0.29175946547884185, 0.2583518930957684, 0.2561247216035635, 0.2561247216035635, 0.2071269487750557, 0.26948775055679286, 0.3051224944320713, 0.2561247216035635, 0.27616926503340755, 0.2739420935412027, 0.27616926503340755, 0.2984409799554566, 0.2561247216035635, 0.22271714922049, 0.267260579064588, 0.24276169265033407, 0.2873051224944321, 0.3051224944320713, 0.26948775055679286, 0.2650334075723831, 0.3028953229398664, 0.27839643652561247, 0.2739420935412027, 0.2383073496659243, 0.29398663697104677, 0.311804008908686, 0.2828507795100223, 0.27839643652561247, 0.23608017817371937, 0.2806236080178174, 0.2650334075723831, 0.2717149220489978, 0.2717149220489978, 0.24498886414253898, 0.26057906458797325, 0.2717149220489978, 0.2806236080178174, 0.2828507795100223, 0.29398663697104677, 0.2650334075723831, 0.24721603563474387, 0.26948775055679286, 0.2650334075723831, 0.289532293986637, 0.2650334075723831, 0.27616926503340755, 0.26280623608017817, 0.2739420935412027, 0.26057906458797325, 0.2516703786191537, 0.24721603563474387, 0.25389755011135856, 0.27616926503340755, 0.2561247216035635, 0.23385300668151449, 0.2828507795100223, 0.28507795100222716, 0.2828507795100223, 0.29175946547884185, 0.24498886414253898, 0.28507795100222716, 0.2806236080178174, 0.2962138084632517, 0.311804008908686, 0.28507795100222716, 0.28507795100222716, 0.311804008908686, 0.267260579064588, 0.27616926503340755, 0.289532293986637, 0.24944320712694878, 0.28507795100222716, 0.2806236080178174, 0.2962138084632517, 0.30957683741648107, 0.29398663697104677, 0.29398663697104677, 0.2583518930957684, 0.31403118040089084, 0.2717149220489978, 0.30066815144766146, 0.3207126948775056, 0.2984409799554566, 0.2962138084632517, 0.27839643652561247, 0.3028953229398664, 0.2806236080178174, 0.2806236080178174, 0.29175946547884185, 0.29175946547884185, 0.2717149220489978, 0.30734966592427615, 0.29175946547884185, 0.267260579064588, 0.32293986636971045, 0.2739420935412027, 0.2739420935412027, 0.28507795100222716, 0.311804008908686, 0.289532293986637, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.30066815144766146, 0.26280623608017817, 0.25389755011135856, 0.27839643652561247, 0.2739420935412027, 0.2650334075723831, 0.29175946547884185, 0.29398663697104677, 0.30066815144766146, 0.2962138084632517, 0.29398663697104677, 0.29175946547884185, 0.28507795100222716, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.2873051224944321, 0.2873051224944321, 0.29398663697104677, 0.2873051224944321, 0.28507795100222716, 0.28507795100222716, 0.29175946547884185, 0.29175946547884185, 0.2873051224944321, 0.2873051224944321, 0.2828507795100223, 0.2873051224944321, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.2873051224944321, 0.2873051224944321, 0.289532293986637, 0.29398663697104677, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.29175946547884185, 0.29398663697104677, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.29398663697104677, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.2873051224944321, 0.29175946547884185, 0.29175946547884185, 0.2873051224944321, 0.29398663697104677, 0.29175946547884185, 0.289532293986637, 0.29398663697104677, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.2873051224944321, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.29175946547884185, 0.29398663697104677, 0.29175946547884185, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.29398663697104677, 0.289532293986637, 0.289532293986637, 0.289532293986637, 0.29398663697104677, 0.29398663697104677, 0.29398663697104677, 0.29398663697104677]
eval_losses.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [60.52962667347328, 34.19122038439812, 23.74606390413569, 15.989179065604517, 20.57854715328174, 9.04792015377291, 12.390776731123637, 7.513786681510823, 4.633021069794296, 4.376709456964166, 5.483146080726504, 6.198753373925563, 3.922561490987085, 7.984975731452483, 6.980345236963047, 6.955710873306462, 3.194411796291581, 3.5164864549127084, 2.6394897388721628, 2.6668942425989095, 3.9236641036375595, 3.2074627334662695, 3.6513183244352616, 2.8088118379524927, 3.209888237887342, 4.289337501494019, 3.618241362157537, 2.8082248745092038, 3.881140016500562, 4.8546685474221585, 5.52738983647064, 3.34798063414134, 3.485520746235327, 2.983701882224306, 3.922460635945632, 3.8075172112619957, 5.363051050755388, 5.0378188656804825, 4.510919257633405, 3.7182637827433562, 4.633824242515394, 3.7800802823429915, 5.003411322234734, 3.7613537346068893, 3.25334558747128, 5.049749144732554, 4.0697823561114035, 5.640474530795634, 4.149524392157726, 5.1195949464704515, 3.9394889207088073, 4.483620117926651, 6.320176044126396, 5.843576809876746, 4.6944663633482495, 6.041127807047957, 6.041127807047957, 7.316565004913738, 6.287772149444954, 6.569959992289808, 5.745986137729976, 5.634041814867797, 5.860415079280369, 5.522538393005762, 7.080721566300084, 5.700946624666652, 5.9127477856149655, 5.832473086354993, 6.7934885603813395, 7.256036807805764, 7.605260503053134, 7.180168118402528, 7.7344492272969605, 9.874718165079045, 10.571295173237212, 10.36026762030969, 8.764358720428959, 7.790637519417999, 7.594722836745078, 7.224606541854501, 8.718016668258107, 8.531252806330047, 8.901704926533263, 9.751480447157455, 7.969998675093619, 8.017999834101024, 8.143946799509775, 8.149615380440098, 8.252461269862934, 7.913748271481231, 7.553692100839254, 8.042389469847647, 7.882082192828767, 8.321121543977734, 9.083245278732283, 8.388956957776722, 10.256710181257507, 8.229672977812838, 9.276915860069886, 9.939182188569305, 8.386310017719566, 9.967405012296409, 8.698384391438456, 9.509878800016203, 9.580370679199032, 9.111281065208079, 9.103026643362236, 9.12107621269396, 9.410857158408133, 10.188055311120166, 10.27910998005644, 10.225904667297822, 11.200445165612916, 11.200445165612916, 10.245226866683875, 10.807444794406338, 10.576647457938417, 10.836683603065847, 10.440909260099874, 10.571329548523527, 10.618614409973468, 10.869641676246456, 10.154033976567614, 10.82368308504865, 10.384950182485687, 10.193687921377492, 10.141651525794796, 10.575895998154026, 10.044374108845513, 10.456247582467467, 10.683493417461625, 10.813685673382341, 10.532923006533514, 10.573380004324202, 10.943000950101755, 10.49535890411428, 10.167846569770694, 10.32294571187289, 10.344909919662305, 10.346937850482746, 10.358354823095496, 10.405758455495262, 10.39418655222402, 10.35615080820691, 10.346692112081566, 10.360590923071438, 10.34382763285945, 10.350707177595465, 10.347361742255417, 10.348462748899227, 10.348547593787943, 10.340842965451008, 10.347406017488254, 10.350020480846243, 10.35179501758652, 10.352050505395987, 10.351541458896646, 10.358427210215206, 10.345337124072628, 10.352709721085223, 10.360429436167523, 10.347760676541148, 10.35342576296664, 10.352188705336012, 10.363568177732965, 10.356063167873629, 10.356763762195817, 10.347180699983525, 10.35826461325775, 10.352814334538042, 10.352814334538042, 10.355381966168206, 10.360799667033428, 10.355002749470666, 10.348384090946087, 10.358356245907483, 10.356281951434894, 10.351265369651047, 10.361950168630859, 10.366199752270246, 10.358269047630921, 10.362712784971054, 10.359818674408247, 10.35767705615751, 10.359826137600072, 10.359001074708118, 10.35296729492451, 10.362555071346478, 10.355844412455314, 10.361089165067353, 10.362927110795189, 10.369960203468136, 10.35872791976334, 10.364024426464514, 10.35911027466534, 10.368139328032665, 10.356590774117706, 10.359216305620155, 10.362541856638307, 10.361038155704405, 10.360817654892172, 10.368718033909532, 10.369256233053909, 10.360446997634021, 10.367601433150753, 10.366811474075828, 10.36838190805142, 10.368725807735808, 10.368054397122101, 10.370929356141717, 10.375884375221744, 10.364232564291072, 10.361336644879959, 10.369356065922165, 10.372793738719881, 10.36799365746683, 10.375184355440544, 10.373609079018998, 10.366479459743458, 10.371740277466635, 10.358641578519265, 10.370541626732706, 10.36719807006734, 10.363986671634665, 10.363764771374402, 10.366395955892873, 10.372516697923961]
final_eval_accuracies.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [0.23608017817371937, 0.26280623608017817, 0.24498886414253898, 0.2873051224944321, 0.24053452115812918, 0.23608017817371937, 0.2071269487750557, 0.27839643652561247, 0.26948775055679286, 0.3051224944320713, 0.2583518930957684, 0.2650334075723831, 0.2828507795100223, 0.2561247216035635, 0.24498886414253898, 0.2717149220489978, 0.2650334075723831, 0.2583518930957684, 0.23608017817371937, 0.24276169265033407, 0.2182628062360802, 0.2583518930957684, 0.26057906458797325, 0.24721603563474387, 0.29175946547884185, 0.2583518930957684, 0.2561247216035635, 0.2561247216035635, 0.2071269487750557, 0.26948775055679286, 0.3051224944320713, 0.2561247216035635, 0.27616926503340755, 0.2739420935412027, 0.27616926503340755, 0.2984409799554566, 0.2561247216035635, 0.22271714922049, 0.267260579064588, 0.24276169265033407, 0.2873051224944321, 0.3051224944320713, 0.26948775055679286, 0.2650334075723831, 0.3028953229398664, 0.27839643652561247, 0.2739420935412027, 0.2383073496659243, 0.29398663697104677, 0.311804008908686, 0.2828507795100223, 0.27839643652561247, 0.23608017817371937, 0.2806236080178174, 0.2650334075723831, 0.2717149220489978, 0.2717149220489978, 0.24498886414253898, 0.26057906458797325, 0.2717149220489978, 0.2806236080178174, 0.2828507795100223, 0.29398663697104677, 0.2650334075723831, 0.24721603563474387, 0.26948775055679286, 0.2650334075723831, 0.289532293986637, 0.2650334075723831, 0.27616926503340755, 0.26280623608017817, 0.2739420935412027, 0.26057906458797325, 0.2516703786191537, 0.24721603563474387, 0.25389755011135856, 0.27616926503340755, 0.2561247216035635, 0.23385300668151449, 0.2828507795100223, 0.28507795100222716, 0.2828507795100223, 0.29175946547884185, 0.24498886414253898, 0.28507795100222716, 0.2806236080178174, 0.2962138084632517, 0.311804008908686, 0.28507795100222716, 0.28507795100222716, 0.311804008908686, 0.267260579064588, 0.27616926503340755, 0.289532293986637, 0.24944320712694878, 0.28507795100222716, 0.2806236080178174, 0.2962138084632517, 0.30957683741648107, 0.29398663697104677, 0.29398663697104677, 0.2583518930957684, 0.31403118040089084, 0.2717149220489978, 0.30066815144766146, 0.3207126948775056, 0.2984409799554566, 0.2962138084632517, 0.27839643652561247, 0.3028953229398664, 0.2806236080178174, 0.2806236080178174, 0.29175946547884185, 0.29175946547884185, 0.2717149220489978, 0.30734966592427615, 0.29175946547884185, 0.267260579064588, 0.32293986636971045, 0.2739420935412027, 0.2739420935412027, 0.28507795100222716, 0.311804008908686, 0.289532293986637, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.30066815144766146, 0.26280623608017817, 0.25389755011135856, 0.27839643652561247, 0.2739420935412027, 0.2650334075723831, 0.29175946547884185, 0.29398663697104677, 0.30066815144766146, 0.2962138084632517, 0.29398663697104677, 0.29175946547884185, 0.28507795100222716, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.2873051224944321, 0.2873051224944321, 0.29398663697104677, 0.2873051224944321, 0.28507795100222716, 0.28507795100222716, 0.29175946547884185, 0.29175946547884185, 0.2873051224944321, 0.2873051224944321, 0.2828507795100223, 0.2873051224944321, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.2873051224944321, 0.2873051224944321, 0.289532293986637, 0.29398663697104677, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.29175946547884185, 0.29398663697104677, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.29398663697104677, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.2873051224944321, 0.29175946547884185, 0.29175946547884185, 0.2873051224944321, 0.29398663697104677, 0.29175946547884185, 0.289532293986637, 0.29398663697104677, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.289532293986637, 0.2873051224944321, 0.29175946547884185, 0.289532293986637, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.29175946547884185, 0.29398663697104677, 0.29175946547884185, 0.289532293986637, 0.29175946547884185, 0.29175946547884185, 0.29398663697104677, 0.289532293986637, 0.289532293986637, 0.289532293986637, 0.29398663697104677, 0.29398663697104677, 0.29398663697104677, 0.29398663697104677, 0.29398663697104677]
final_eval_losses.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [60.52962667347328, 34.19122038439812, 23.74606390413569, 15.989179065604517, 20.57854715328174, 9.04792015377291, 12.390776731123637, 7.513786681510823, 4.633021069794296, 4.376709456964166, 5.483146080726504, 6.198753373925563, 3.922561490987085, 7.984975731452483, 6.980345236963047, 6.955710873306462, 3.194411796291581, 3.5164864549127084, 2.6394897388721628, 2.6668942425989095, 3.9236641036375595, 3.2074627334662695, 3.6513183244352616, 2.8088118379524927, 3.209888237887342, 4.289337501494019, 3.618241362157537, 2.8082248745092038, 3.881140016500562, 4.8546685474221585, 5.52738983647064, 3.34798063414134, 3.485520746235327, 2.983701882224306, 3.922460635945632, 3.8075172112619957, 5.363051050755388, 5.0378188656804825, 4.510919257633405, 3.7182637827433562, 4.633824242515394, 3.7800802823429915, 5.003411322234734, 3.7613537346068893, 3.25334558747128, 5.049749144732554, 4.0697823561114035, 5.640474530795634, 4.149524392157726, 5.1195949464704515, 3.9394889207088073, 4.483620117926651, 6.320176044126396, 5.843576809876746, 4.6944663633482495, 6.041127807047957, 6.041127807047957, 7.316565004913738, 6.287772149444954, 6.569959992289808, 5.745986137729976, 5.634041814867797, 5.860415079280369, 5.522538393005762, 7.080721566300084, 5.700946624666652, 5.9127477856149655, 5.832473086354993, 6.7934885603813395, 7.256036807805764, 7.605260503053134, 7.180168118402528, 7.7344492272969605, 9.874718165079045, 10.571295173237212, 10.36026762030969, 8.764358720428959, 7.790637519417999, 7.594722836745078, 7.224606541854501, 8.718016668258107, 8.531252806330047, 8.901704926533263, 9.751480447157455, 7.969998675093619, 8.017999834101024, 8.143946799509775, 8.149615380440098, 8.252461269862934, 7.913748271481231, 7.553692100839254, 8.042389469847647, 7.882082192828767, 8.321121543977734, 9.083245278732283, 8.388956957776722, 10.256710181257507, 8.229672977812838, 9.276915860069886, 9.939182188569305, 8.386310017719566, 9.967405012296409, 8.698384391438456, 9.509878800016203, 9.580370679199032, 9.111281065208079, 9.103026643362236, 9.12107621269396, 9.410857158408133, 10.188055311120166, 10.27910998005644, 10.225904667297822, 11.200445165612916, 11.200445165612916, 10.245226866683875, 10.807444794406338, 10.576647457938417, 10.836683603065847, 10.440909260099874, 10.571329548523527, 10.618614409973468, 10.869641676246456, 10.154033976567614, 10.82368308504865, 10.384950182485687, 10.193687921377492, 10.141651525794796, 10.575895998154026, 10.044374108845513, 10.456247582467467, 10.683493417461625, 10.813685673382341, 10.532923006533514, 10.573380004324202, 10.943000950101755, 10.49535890411428, 10.167846569770694, 10.32294571187289, 10.344909919662305, 10.346937850482746, 10.358354823095496, 10.405758455495262, 10.39418655222402, 10.35615080820691, 10.346692112081566, 10.360590923071438, 10.34382763285945, 10.350707177595465, 10.347361742255417, 10.348462748899227, 10.348547593787943, 10.340842965451008, 10.347406017488254, 10.350020480846243, 10.35179501758652, 10.352050505395987, 10.351541458896646, 10.358427210215206, 10.345337124072628, 10.352709721085223, 10.360429436167523, 10.347760676541148, 10.35342576296664, 10.352188705336012, 10.363568177732965, 10.356063167873629, 10.356763762195817, 10.347180699983525, 10.35826461325775, 10.352814334538042, 10.352814334538042, 10.355381966168206, 10.360799667033428, 10.355002749470666, 10.348384090946087, 10.358356245907483, 10.356281951434894, 10.351265369651047, 10.361950168630859, 10.366199752270246, 10.358269047630921, 10.362712784971054, 10.359818674408247, 10.35767705615751, 10.359826137600072, 10.359001074708118, 10.35296729492451, 10.362555071346478, 10.355844412455314, 10.361089165067353, 10.362927110795189, 10.369960203468136, 10.35872791976334, 10.364024426464514, 10.35911027466534, 10.368139328032665, 10.356590774117706, 10.359216305620155, 10.362541856638307, 10.361038155704405, 10.360817654892172, 10.368718033909532, 10.369256233053909, 10.360446997634021, 10.367601433150753, 10.366811474075828, 10.36838190805142, 10.368725807735808, 10.368054397122101, 10.370929356141717, 10.375884375221744, 10.364232564291072, 10.361336644879959, 10.369356065922165, 10.372793738719881, 10.36799365746683, 10.375184355440544, 10.373609079018998, 10.366479459743458, 10.371740277466635, 10.358641578519265, 10.370541626732706, 10.36719807006734, 10.363986671634665, 10.363764771374402, 10.366395955892873, 10.372516697923961, 10.372516697923961]
final_training_accuracies.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
final_training_losses.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [69.8818, 63.8803, 39.2378, 40.1697, 29.6556, 18.9288, 20.4652, 14.9841, 13.1094, 18.3367, 14.6834, 11.4179, 12.1548, 9.2089, 12.6553, 8.9966, 6.6264, 5.425, 4.0108, 4.6446, 4.2037, 5.351, 3.5588, 3.3752, 3.9435, 2.9198, 2.5778, 4.9556, 3.6617, 3.3234, 2.322, 2.654, 3.9577, 3.7476, 4.3091, 3.4247, 3.4923, 2.0426, 2.7236, 3.1049, 2.4879, 3.166, 3.0878, 2.5788, 2.6609, 2.0838, 1.7099, 1.5804, 1.6998, 2.9089, 2.4612, 2.3049, 2.153, 1.7821, 2.2852, 2.6422, 1.9646, 2.8868, 2.3617, 2.5646, 2.2608, 3.5567, 2.4334, 2.2484, 1.8068, 1.6729, 1.5552, 1.5632, 1.0311, 1.3329, 1.1305, 1.4385, 1.2884, 1.2844, 1.6241, 1.586, 1.6943, 1.171, 2.9335, 1.3589, 0.9444, 1.1969, 1.1131, 1.4661, 2.1222, 1.7025, 1.867, 0.7293, 1.19, 0.7823, 0.4771, 0.9291, 1.3334, 1.0675, 1.2327, 1.6467, 0.8789, 0.7196, 1.139, 1.9294, 1.7003, 0.5862, 0.9022, 1.5321, 1.1659, 1.2519, 1.0113, 0.7846, 0.8392, 0.7273, 1.1548, 0.943, 0.7082, 0.5344, 0.244, 0.6174, 0.3389, 1.3124, 0.8551, 0.3376, 0.4604, 0.5414, 0.496, 0.674, 0.8145, 1.2361, 0.8749, 0.415, 0.5223, 0.4985, 0.5853, 0.5778, 0.3809, 0.6341, 0.236, 0.1651, 0.211, 0.0128, 0.1367, 0.3378, 0.2076, 0.0721, 0.2576, 0.4862, 0.5412, 0.128, 0.1909, 0.4571, 0.433, 0.5168, 0.4076, 0.155, 0.2624, 0.2507, 0.4074, 0.1483, 0.1183, 0.0069, 0.14, 0.0987, 0.0256, 0.1885, 0.2929, 0.2532, 0.8499, 0.2517, 0.0098, 0.1959, 0.3862, 0.107, 0.3689, 0.0116, 0.3245, 0.4243, 0.0063, 0.0352, 0.3627, 0.2202, 0.5729, 0.0793, 0.0164, 0.0013, 0.0004, 0.0162, 0.0008, 0.0012, 0.444, 0.0622, 0.0001, 0.0047, 0.0653, 0.2099, 0.0557, 0.3274, 0.0002, 0.1174, 0.0468, 0.0689, 0.1094, 0.1829, 0.1645, 0.0142, 0.0589, 0.1568, 0.1529, 0.1008, 0.0965, 0.0044, 0.0001, 0.0001, 0.0, 0.0069, 0.0002, 0.0061, 0.0108, 0.0308, 0.0325, 0.4928, 0.0014, 0.0184, 0.0001, 0.0009, 0.2344, 0.0089, 0.0012, 0.0003, 0.0314, 0.0002, 0.0002, 0.0, 0.0033, 0.0005, 0.0001, 0.0, 0.0001, 0.0, 0.0, 0.0, 0.1, 0.0617, 0.1157, 0.0005, 0.0001, 0.0003, 0.0243, 0.0457, 0.0001, 0.0001, 0.0059, 0.0066, 0.0, 0.0, 0.0549, 0.0178, 0.0, 0.0834, 0.0, 0.0001, 0.0829, 0.0047, 0.0, 0.0, 0.0, 0.2143, 0.0019, 0.0, 0.0034, 0.0001, 0.0, 0.0001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.40.2"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfcbe4ceeb8c9186498c87b9e4aa3574333ed492e26791133634e7c2d691492b
3
+ size 500823808
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": true,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "model_max_length": 2048,
19
+ "pad_token": "<|endoftext|>",
20
+ "tokenizer_class": "GPT2Tokenizer",
21
+ "unk_token": "<|endoftext|>"
22
+ }
training_accuracies.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
training_losses.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [69.8818, 63.8803, 39.2378, 40.1697, 29.6556, 18.9288, 20.4652, 14.9841, 13.1094, 18.3367, 14.6834, 11.4179, 12.1548, 9.2089, 12.6553, 8.9966, 6.6264, 5.425, 4.0108, 4.6446, 4.2037, 5.351, 3.5588, 3.3752, 3.9435, 2.9198, 2.5778, 4.9556, 3.6617, 3.3234, 2.322, 2.654, 3.9577, 3.7476, 4.3091, 3.4247, 3.4923, 2.0426, 2.7236, 3.1049, 2.4879, 3.166, 3.0878, 2.5788, 2.6609, 2.0838, 1.7099, 1.5804, 1.6998, 2.9089, 2.4612, 2.3049, 2.153, 1.7821, 2.2852, 2.6422, 1.9646, 2.8868, 2.3617, 2.5646, 2.2608, 3.5567, 2.4334, 2.2484, 1.8068, 1.6729, 1.5552, 1.5632, 1.0311, 1.3329, 1.1305, 1.4385, 1.2884, 1.2844, 1.6241, 1.586, 1.6943, 1.171, 2.9335, 1.3589, 0.9444, 1.1969, 1.1131, 1.4661, 2.1222, 1.7025, 1.867, 0.7293, 1.19, 0.7823, 0.4771, 0.9291, 1.3334, 1.0675, 1.2327, 1.6467, 0.8789, 0.7196, 1.139, 1.9294, 1.7003, 0.5862, 0.9022, 1.5321, 1.1659, 1.2519, 1.0113, 0.7846, 0.8392, 0.7273, 1.1548, 0.943, 0.7082, 0.5344, 0.244, 0.6174, 0.3389, 1.3124, 0.8551, 0.3376, 0.4604, 0.5414, 0.496, 0.674, 0.8145, 1.2361, 0.8749, 0.415, 0.5223, 0.4985, 0.5853, 0.5778, 0.3809, 0.6341, 0.236, 0.1651, 0.211, 0.0128, 0.1367, 0.3378, 0.2076, 0.0721, 0.2576, 0.4862, 0.5412, 0.128, 0.1909, 0.4571, 0.433, 0.5168, 0.4076, 0.155, 0.2624, 0.2507, 0.4074, 0.1483, 0.1183, 0.0069, 0.14, 0.0987, 0.0256, 0.1885, 0.2929, 0.2532, 0.8499, 0.2517, 0.0098, 0.1959, 0.3862, 0.107, 0.3689, 0.0116, 0.3245, 0.4243, 0.0063, 0.0352, 0.3627, 0.2202, 0.5729, 0.0793, 0.0164, 0.0013, 0.0004, 0.0162, 0.0008, 0.0012, 0.444, 0.0622, 0.0001, 0.0047, 0.0653, 0.2099, 0.0557, 0.3274, 0.0002, 0.1174, 0.0468, 0.0689, 0.1094, 0.1829, 0.1645, 0.0142, 0.0589, 0.1568, 0.1529, 0.1008, 0.0965, 0.0044, 0.0001, 0.0001, 0.0, 0.0069, 0.0002, 0.0061, 0.0108, 0.0308, 0.0325, 0.4928, 0.0014, 0.0184, 0.0001, 0.0009, 0.2344, 0.0089, 0.0012, 0.0003, 0.0314, 0.0002, 0.0002, 0.0, 0.0033, 0.0005, 0.0001, 0.0, 0.0001, 0.0, 0.0, 0.0, 0.1, 0.0617, 0.1157, 0.0005, 0.0001, 0.0003, 0.0243, 0.0457, 0.0001, 0.0001, 0.0059, 0.0066, 0.0, 0.0, 0.0549, 0.0178, 0.0, 0.0834, 0.0, 0.0001, 0.0829, 0.0047, 0.0, 0.0, 0.0, 0.2143, 0.0019, 0.0, 0.0034, 0.0001, 0.0, 0.0001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
vocab.json ADDED
The diff for this file is too large to render. See raw diff