kenthua commited on
Commit
5fff604
1 Parent(s): 383226f

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +91 -0
  2. checkpoint-1000/optimizer_0/.metadata +3 -0
  3. checkpoint-1000/optimizer_0/__0_0.distcp +3 -0
  4. checkpoint-1000/optimizer_0/__1_0.distcp +3 -0
  5. checkpoint-1000/optimizer_0/__2_0.distcp +3 -0
  6. checkpoint-1000/optimizer_0/__3_0.distcp +3 -0
  7. checkpoint-1000/optimizer_0/__4_0.distcp +3 -0
  8. checkpoint-1000/optimizer_0/__5_0.distcp +3 -0
  9. checkpoint-1000/optimizer_0/__6_0.distcp +3 -0
  10. checkpoint-1000/optimizer_0/__7_0.distcp +3 -0
  11. checkpoint-1000/pytorch_model_fsdp_0/.metadata +3 -0
  12. checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp +3 -0
  13. checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp +3 -0
  14. checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp +3 -0
  15. checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp +3 -0
  16. checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp +3 -0
  17. checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp +3 -0
  18. checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp +3 -0
  19. checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp +3 -0
  20. checkpoint-1000/rng_state_0.pth +3 -0
  21. checkpoint-1000/rng_state_1.pth +3 -0
  22. checkpoint-1000/rng_state_2.pth +3 -0
  23. checkpoint-1000/rng_state_3.pth +3 -0
  24. checkpoint-1000/rng_state_4.pth +3 -0
  25. checkpoint-1000/rng_state_5.pth +3 -0
  26. checkpoint-1000/rng_state_6.pth +3 -0
  27. checkpoint-1000/rng_state_7.pth +3 -0
  28. checkpoint-1000/scheduler.pt +3 -0
  29. checkpoint-1000/trainer_state.json +173 -0
  30. checkpoint-2000/optimizer_0/.metadata +3 -0
  31. checkpoint-2000/optimizer_0/__0_0.distcp +3 -0
  32. checkpoint-2000/optimizer_0/__1_0.distcp +3 -0
  33. checkpoint-2000/optimizer_0/__2_0.distcp +3 -0
  34. checkpoint-2000/optimizer_0/__3_0.distcp +3 -0
  35. checkpoint-2000/optimizer_0/__4_0.distcp +3 -0
  36. checkpoint-2000/optimizer_0/__5_0.distcp +3 -0
  37. checkpoint-2000/optimizer_0/__6_0.distcp +3 -0
  38. checkpoint-2000/optimizer_0/__7_0.distcp +3 -0
  39. checkpoint-2000/pytorch_model_fsdp_0/.metadata +3 -0
  40. checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp +3 -0
  41. checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp +3 -0
  42. checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp +3 -0
  43. checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp +3 -0
  44. checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp +3 -0
  45. checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp +3 -0
  46. checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp +3 -0
  47. checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp +3 -0
  48. checkpoint-2000/rng_state_0.pth +3 -0
  49. checkpoint-2000/rng_state_1.pth +3 -0
  50. checkpoint-2000/rng_state_2.pth +3 -0
.gitattributes CHANGED
@@ -33,3 +33,94 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-1000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-1000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-1000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
39
+ checkpoint-1000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
40
+ checkpoint-1000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
41
+ checkpoint-1000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
42
+ checkpoint-1000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
43
+ checkpoint-1000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
44
+ checkpoint-1000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
45
+ checkpoint-1000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
46
+ checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
47
+ checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
48
+ checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
49
+ checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
50
+ checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
51
+ checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
52
+ checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
53
+ checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
54
+ checkpoint-2000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
55
+ checkpoint-2000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
56
+ checkpoint-2000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
57
+ checkpoint-2000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
58
+ checkpoint-2000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
59
+ checkpoint-2000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
60
+ checkpoint-2000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
61
+ checkpoint-2000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
62
+ checkpoint-2000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
63
+ checkpoint-2000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
64
+ checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
65
+ checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
66
+ checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
67
+ checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
68
+ checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
69
+ checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
70
+ checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
71
+ checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
72
+ checkpoint-3000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
73
+ checkpoint-3000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
74
+ checkpoint-3000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
75
+ checkpoint-3000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
76
+ checkpoint-3000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
77
+ checkpoint-3000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
78
+ checkpoint-3000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
79
+ checkpoint-3000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
80
+ checkpoint-3000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
81
+ checkpoint-3000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
82
+ checkpoint-3000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
83
+ checkpoint-3000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
84
+ checkpoint-3000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
85
+ checkpoint-3000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
86
+ checkpoint-3000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
87
+ checkpoint-3000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
88
+ checkpoint-3000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
89
+ checkpoint-3000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
90
+ checkpoint-4000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
91
+ checkpoint-4000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
92
+ checkpoint-4000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
93
+ checkpoint-4000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
94
+ checkpoint-4000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
95
+ checkpoint-4000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
96
+ checkpoint-4000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
97
+ checkpoint-4000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
98
+ checkpoint-4000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
99
+ checkpoint-4000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
100
+ checkpoint-4000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
101
+ checkpoint-4000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
102
+ checkpoint-4000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
103
+ checkpoint-4000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
104
+ checkpoint-4000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
105
+ checkpoint-4000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
106
+ checkpoint-4000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
107
+ checkpoint-4000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
108
+ checkpoint-4863/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
109
+ checkpoint-4863/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
110
+ checkpoint-4863/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
111
+ checkpoint-4863/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
112
+ checkpoint-4863/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
113
+ checkpoint-4863/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
114
+ checkpoint-4863/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
115
+ checkpoint-4863/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
116
+ checkpoint-4863/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
117
+ checkpoint-4863/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
118
+ checkpoint-4863/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
119
+ checkpoint-4863/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
120
+ checkpoint-4863/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
121
+ checkpoint-4863/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
122
+ checkpoint-4863/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
123
+ checkpoint-4863/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
124
+ checkpoint-4863/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
125
+ checkpoint-4863/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
126
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-1000/optimizer_0/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c823bc431bf7d807ed32a5f978a1748f129e77a58aba3594daf3a2045d091648
3
+ size 2626018
checkpoint-1000/optimizer_0/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3c2153bf02b381c6ad2ffe0824dc5ba46c39423c40fb753a13b2b81d22c2d6d
3
+ size 55406592
checkpoint-1000/optimizer_0/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:976f38d9d7d31749574bb1d3dc238341bd89db0ad56b4a89aca61daaedbd1dc8
3
+ size 55526656
checkpoint-1000/optimizer_0/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab8f4a691793cccd18d28711b97db5c3fffd047450c73cd1bc8c15d242a455e0
3
+ size 55480896
checkpoint-1000/optimizer_0/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7794d7737fe9ab804c47265172c228f322c3ce0d8ae1f4b0da1c0961299c454b
3
+ size 55480896
checkpoint-1000/optimizer_0/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be0f25d5d1ef6a5da7241da8e0bfd32f5ea5acae0402cb65c0b237cdb85ba52
3
+ size 55480032
checkpoint-1000/optimizer_0/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d508dfa9519b3b5007e08eb3cfcadb5e1af8f66b5cbb7c46433de1a0220b3fd0
3
+ size 55480032
checkpoint-1000/optimizer_0/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef388af3d62f8049bdd7aa4bd92686f81eba235bd699a8770d3d998bd7fc7201
3
+ size 55480032
checkpoint-1000/optimizer_0/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:862e1e89ad8d6d45b9612642902fb8e7e24f3242c701559334e4a98718bd9930
3
+ size 55480032
checkpoint-1000/pytorch_model_fsdp_0/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19502f0f22e6a789d9430be30fd1319f8dd68afbb00a9bc001926d880d5042d1
3
+ size 1064888
checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a8ae8d0d9b4c6d4906e101e591b6ff24ca8256fb349638bc2c9bafe125a6fb
3
+ size 27702864
checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fe1fb95d56970cff4dcbc83a83ca7f8ecac9bc641868cf8fde4b69f5b7b57f5
3
+ size 27702864
checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be0526cb870508e64adac4dba0b1cd510e90492d4a0c715b6f3824cd9a832ce6
3
+ size 27702864
checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:548171e890776a3386a8114d5e24eb128c0035a60402484b03960f2e01651715
3
+ size 27702864
checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40528c0670a9dbde32993d03f21ac1cb232c25c596ab49daa179f3ebcf19bf8d
3
+ size 27702864
checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30cd894ac0476288fe9a1b47f5257a750b3f8b247ccaa46692096d393f54c7a
3
+ size 27702864
checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0e628ecd537a0d31f91287a8a7320005dcef7e164a9f8b11c199eeda8058c52
3
+ size 27702864
checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd911f0a16f12715e3105bf4ac7d5735f4c9e2bb7b1e1fd50687fa706884fbd0
3
+ size 27702864
checkpoint-1000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:852a2229a3726ffedc43daf9b32d882ac09be192242bada110e4e27a158a4ad8
3
+ size 15984
checkpoint-1000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:607d5b9fe1b6ffab2c6ab8c0cca7a2dc074f35f0f539611c21febcb657cb9230
3
+ size 15984
checkpoint-1000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21bfb8fc3702bc02f20eecb02befd28c4d4a0cf2d6e241c7b306fedf67d74101
3
+ size 15984
checkpoint-1000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f680126569c803d74359f9aa642e22ede3c8af20715e7c162ef1abe0100375d8
3
+ size 15984
checkpoint-1000/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0095abae33bd227b2ec0fdc9f9fe68be4e271a993b2a40b77676a7d46e7c0877
3
+ size 15984
checkpoint-1000/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb75e7ee126757b5c516ccdb6844f17b23593a1586dc161a8668186a81a17481
3
+ size 15984
checkpoint-1000/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2d0e74d163711a1f334aacbfe6cc80b35858c2f0be112236077c3cb5c8047a2
3
+ size 15984
checkpoint-1000/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba53e2865adec6eb39312aa9f9ea49a46316b7a8c07f97959fc55d8a4a61f463
3
+ size 15984
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1252495abb6207a87314f27eb7a52b71b9170963ff2e044a7fac5b9a90ef861
3
+ size 1064
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.6169031462060457,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.030845157310302282,
13
+ "grad_norm": 2.8206074237823486,
14
+ "learning_rate": 1.0277492291880782e-05,
15
+ "loss": 1.8082,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.061690314620604564,
20
+ "grad_norm": 3.4183013439178467,
21
+ "learning_rate": 2.0554984583761563e-05,
22
+ "loss": 0.6538,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.09253547193090685,
27
+ "grad_norm": 2.170591354370117,
28
+ "learning_rate": 3.083247687564235e-05,
29
+ "loss": 0.4563,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.12338062924120913,
34
+ "grad_norm": 1.4687080383300781,
35
+ "learning_rate": 4.110996916752313e-05,
36
+ "loss": 0.4263,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.15422578655151142,
41
+ "grad_norm": 1.836676836013794,
42
+ "learning_rate": 5.1387461459403907e-05,
43
+ "loss": 0.3994,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.1850709438618137,
48
+ "grad_norm": 1.2718663215637207,
49
+ "learning_rate": 6.16649537512847e-05,
50
+ "loss": 0.3665,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.215916101172116,
55
+ "grad_norm": 1.6945191621780396,
56
+ "learning_rate": 7.194244604316547e-05,
57
+ "loss": 0.3577,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 0.24676125848241826,
62
+ "grad_norm": 1.2829898595809937,
63
+ "learning_rate": 8.221993833504625e-05,
64
+ "loss": 0.347,
65
+ "step": 400
66
+ },
67
+ {
68
+ "epoch": 0.27760641579272055,
69
+ "grad_norm": 1.01521635055542,
70
+ "learning_rate": 9.249743062692704e-05,
71
+ "loss": 0.3288,
72
+ "step": 450
73
+ },
74
+ {
75
+ "epoch": 0.30845157310302285,
76
+ "grad_norm": 1.522111415863037,
77
+ "learning_rate": 0.00010277492291880781,
78
+ "loss": 0.3267,
79
+ "step": 500
80
+ },
81
+ {
82
+ "epoch": 0.3392967304133251,
83
+ "grad_norm": 0.9678927659988403,
84
+ "learning_rate": 0.00011305241521068859,
85
+ "loss": 0.3198,
86
+ "step": 550
87
+ },
88
+ {
89
+ "epoch": 0.3701418877236274,
90
+ "grad_norm": 1.2144405841827393,
91
+ "learning_rate": 0.0001233299075025694,
92
+ "loss": 0.3099,
93
+ "step": 600
94
+ },
95
+ {
96
+ "epoch": 0.4009870450339297,
97
+ "grad_norm": 1.3122639656066895,
98
+ "learning_rate": 0.00013360739979445017,
99
+ "loss": 0.2929,
100
+ "step": 650
101
+ },
102
+ {
103
+ "epoch": 0.431832202344232,
104
+ "grad_norm": 1.0934101343154907,
105
+ "learning_rate": 0.00014388489208633093,
106
+ "loss": 0.3003,
107
+ "step": 700
108
+ },
109
+ {
110
+ "epoch": 0.4626773596545342,
111
+ "grad_norm": 0.7938969731330872,
112
+ "learning_rate": 0.00015416238437821172,
113
+ "loss": 0.2956,
114
+ "step": 750
115
+ },
116
+ {
117
+ "epoch": 0.4935225169648365,
118
+ "grad_norm": 0.6571168303489685,
119
+ "learning_rate": 0.0001644398766700925,
120
+ "loss": 0.2736,
121
+ "step": 800
122
+ },
123
+ {
124
+ "epoch": 0.5243676742751388,
125
+ "grad_norm": 1.0073938369750977,
126
+ "learning_rate": 0.0001747173689619733,
127
+ "loss": 0.2892,
128
+ "step": 850
129
+ },
130
+ {
131
+ "epoch": 0.5552128315854411,
132
+ "grad_norm": 0.9874083399772644,
133
+ "learning_rate": 0.00018499486125385408,
134
+ "loss": 0.2723,
135
+ "step": 900
136
+ },
137
+ {
138
+ "epoch": 0.5860579888957433,
139
+ "grad_norm": 1.1770968437194824,
140
+ "learning_rate": 0.00019527235354573487,
141
+ "loss": 0.2855,
142
+ "step": 950
143
+ },
144
+ {
145
+ "epoch": 0.6169031462060457,
146
+ "grad_norm": 1.00326669216156,
147
+ "learning_rate": 0.00019997622717095418,
148
+ "loss": 0.2587,
149
+ "step": 1000
150
+ }
151
+ ],
152
+ "logging_steps": 50,
153
+ "max_steps": 4863,
154
+ "num_input_tokens_seen": 0,
155
+ "num_train_epochs": 3,
156
+ "save_steps": 1000,
157
+ "stateful_callbacks": {
158
+ "TrainerControl": {
159
+ "args": {
160
+ "should_epoch_stop": false,
161
+ "should_evaluate": false,
162
+ "should_log": false,
163
+ "should_save": true,
164
+ "should_training_stop": false
165
+ },
166
+ "attributes": {}
167
+ }
168
+ },
169
+ "total_flos": 7124677004623872.0,
170
+ "train_batch_size": 1,
171
+ "trial_name": null,
172
+ "trial_params": null
173
+ }
checkpoint-2000/optimizer_0/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5626ee28c1f24f09a7b7f3a240119b9d2db5082be6528657524ec153ab4bab40
3
+ size 2626018
checkpoint-2000/optimizer_0/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1ddac24aff4bfe008d75f42dabb23ed177efe5cefe9f7b5f29f71a0ebdb1c1
3
+ size 55406592
checkpoint-2000/optimizer_0/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644a8b57f5bf12acebd9a45311e38f079556731bd0cce4d9e4479c52fe290daf
3
+ size 55526656
checkpoint-2000/optimizer_0/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e3ed8ae7c7f161c908da0ff18b57aeca951ebbd215de56c152243a70b4d230
3
+ size 55480896
checkpoint-2000/optimizer_0/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e94e18382a97151194018cb781cf0fe22334ee35ac961ca0af3001abd6bc932
3
+ size 55480896
checkpoint-2000/optimizer_0/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e6da3410ebcd43453c81d05a10eafb02c674cac7d1b8798b0933af17d3a218
3
+ size 55480032
checkpoint-2000/optimizer_0/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8c41c0e613f61c6d53924a953f451e70a74c66778418a43b07e0a72d4837fc
3
+ size 55480032
checkpoint-2000/optimizer_0/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75dd182d21f0d83d4f2e1d017b7ad1bc81d1cdd6b5f05c411eb7156db6af5b54
3
+ size 55480032
checkpoint-2000/optimizer_0/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1887e9892bd4a005864e6de571616f9850350f98993f8ddf36fcfddca8d360e4
3
+ size 55480032
checkpoint-2000/pytorch_model_fsdp_0/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a19ab021f1ba6fc33e10c18810aae15bcdcc44f398f902265ff6fd94a02d4df
3
+ size 1064888
checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:275c922a12364010c488d511c3b5e5de1bfe494ef4dbb54b1892668b1d4bfaac
3
+ size 27702864
checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b96915d94e47d988a4d4c1fd5a228abcba8bb73c05b155cff6c893a0ebf85b09
3
+ size 27702864
checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c852e5098594f95316cb3a0b872de32d839d418dc744165865805834e452b2a6
3
+ size 27702864
checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df3993232148d686149b082f51d8dbdc8c96c898b3d4b399724f87c614527e80
3
+ size 27702864
checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b68262b2ed6d2b9c1b691f92a6ad19522c13477f38044762eccfd58b25d39963
3
+ size 27702864
checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f12638b582d1293d83d0d56e4ca8036a67a4bf47d1e68fb4260ce7492b4820d
3
+ size 27702864
checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e9745a4bdc41fb2deb28b003850b909d73fa094c02afef0ad14e955dfe9b153
3
+ size 27702864
checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5b433f984a2fbda86bfb4194be437e3ca37d25649ae5ce1ea1a70a13965cea7
3
+ size 27702864
checkpoint-2000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8204337523c9526120d9dbd77c1b85e83685a646168ef1aa5614cc1cc72b52f5
3
+ size 15984
checkpoint-2000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fcac6b4bd212354a8c95c4fd130500378c409f8c520e7ec730ba272ddee284d
3
+ size 15984
checkpoint-2000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a9bb45dc4e4d1accd280a5304c4c6b48809c92185e462b3df1633b8c69321c1
3
+ size 15984