diff --git a/flax_model-00001-of-00072.msgpack b/flax_model-00001-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..13f5a1c07d46cb95fa5a0c3a260649c0755a6251 --- /dev/null +++ b/flax_model-00001-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a29f98d8415e2cf649b2504050dbb3e7559d9eec39cc2ab0d2e2a3d445b9207 +size 7193289083 diff --git a/flax_model-00002-of-00072.msgpack b/flax_model-00002-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..b929ede65f4f919500bbca3bbf0bb3c9e6b3476e --- /dev/null +++ b/flax_model-00002-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde4ea226965e41c4ece2e627459ebda7dff05a016a4e8b58ffd27f66a761c4b +size 4932874985 diff --git a/flax_model-00003-of-00072.msgpack b/flax_model-00003-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..a3182ecfc180a3ffc977156e7f330a79cb76a44c --- /dev/null +++ b/flax_model-00003-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2f866bf9cdbdeceb970d1578fc48a3051dbb47b969d887bc2ffb24443fd400b +size 4932874985 diff --git a/flax_model-00004-of-00072.msgpack b/flax_model-00004-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..a25b33b9987fd4f87752a5ba93be543b0f936e08 --- /dev/null +++ b/flax_model-00004-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db15a88db13fd2f4c21e7c2c6cb4036cffeb26ae0d367f29b1b05e8a7ca4237f +size 4932874985 diff --git a/flax_model-00005-of-00072.msgpack b/flax_model-00005-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..deedfcdac2d5a182efe92443b89cff5721607779 --- /dev/null +++ b/flax_model-00005-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ea6c5ab4c28c1cb460822609bd6e012ccb32e27ea7a469783fedf868412eb37 +size 4932874985 diff --git a/flax_model-00006-of-00072.msgpack b/flax_model-00006-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..b5978a3b179bc7e0f696b5f40cfbf11580152644 --- /dev/null +++ b/flax_model-00006-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d2df640e695769e25d3d4b383653f05f78483eb1705f180c0a988097479429 +size 4932874985 diff --git a/flax_model-00007-of-00072.msgpack b/flax_model-00007-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..50d0dc3e7fa0b8691581ea126864f54fea5d430c --- /dev/null +++ b/flax_model-00007-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f6c1c11aad69a32e93f327662049eabf4e5b70de46f4e1c81542a4a1386fad +size 4932874985 diff --git a/flax_model-00008-of-00072.msgpack b/flax_model-00008-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..c09dc973989b37040cfd0326e64e599624613557 --- /dev/null +++ b/flax_model-00008-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82320e30d31136e05b0781a2d46e2e9967d9e90a903fb6cce9f9f1099ee4769a +size 4932874985 diff --git a/flax_model-00009-of-00072.msgpack b/flax_model-00009-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..7fa3a803f156ae70149159cccbf385047855979e --- /dev/null +++ b/flax_model-00009-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcdf327ae027d77f2720d7c2231bb3a4f4d0cfd1729ddac311134b6ef8de72fb +size 4932874985 diff --git a/flax_model-00010-of-00072.msgpack b/flax_model-00010-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..1ce1dc4ce497ac7aa37ae63039b97946158020e3 --- /dev/null +++ b/flax_model-00010-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c2490afbd78380c7363e45cf007d7034cd56c52a55d3ac922343f9ccea85981 +size 4932874985 diff --git a/flax_model-00011-of-00072.msgpack b/flax_model-00011-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..d6f272c36a7ae1da229fe1b8d543f468ba1d43ea --- /dev/null +++ b/flax_model-00011-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9faa077705fbfcb1256df081c52117a7e3e6712c0e9417507eaa7a5ea4ab139e +size 4932874985 diff --git a/flax_model-00012-of-00072.msgpack b/flax_model-00012-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..5ae4b5ff75a3698036458020cd79ece08feacd43 --- /dev/null +++ b/flax_model-00012-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde89d956f619e3862891f73d3f15082c1cd8e34b9bebfe38e8603bd5c1c125b +size 4932874986 diff --git a/flax_model-00013-of-00072.msgpack b/flax_model-00013-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..95b852f73ae2fe2a1a1f7b81c911866109f21743 --- /dev/null +++ b/flax_model-00013-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53c87e5fcf0e68b5a71e846f45fe8d8fb6e5c55160c1569e10b1829481972ef7 +size 4932874986 diff --git a/flax_model-00014-of-00072.msgpack b/flax_model-00014-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..d4913e1ec8862fd8541e4a19dd936c290b0cc5bf --- /dev/null +++ b/flax_model-00014-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c728b7f64a0559a0f9f740cdbbf8c38fe67760c24401bdacb6fbc4ba68fc78 +size 4932874986 diff --git a/flax_model-00015-of-00072.msgpack b/flax_model-00015-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..9d5bce10027f5c334c9984ea3cf6c8d5d92bdec0 --- /dev/null +++ b/flax_model-00015-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b283461977491d678267cdd5163420e9b7a25d5f8e6051834a2f2604839aebf1 +size 4932874986 diff --git a/flax_model-00016-of-00072.msgpack b/flax_model-00016-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..6aea24639c9de01b67d249ca7976907f8be63896 --- /dev/null +++ b/flax_model-00016-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d4f863a2281f19f5a27d818b9f47d70e02c0b35f44f6703376997bd43d05ad +size 4932874986 diff --git a/flax_model-00017-of-00072.msgpack b/flax_model-00017-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..f9928370ba55f0541b72638629ee5751576695f9 --- /dev/null +++ b/flax_model-00017-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f50d0faee6b8786b5fb0ca486dc34df328796733789c73959466091bd4d5f7b5 +size 4932874986 diff --git a/flax_model-00018-of-00072.msgpack b/flax_model-00018-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..de39baa0397632bdd7d11661e99940dfda6c0363 --- /dev/null +++ b/flax_model-00018-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3de2b7409a3357d44000a2bbeb423c870a5f6064f2d1d02032ad69d49398672 +size 4932874986 diff --git a/flax_model-00019-of-00072.msgpack b/flax_model-00019-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..2b1bc91744cd22a021c682dd58019e22ebf135f7 --- /dev/null +++ b/flax_model-00019-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ceb3ce5765a7e2e6c81b2a9f98ca18be4cc623054f5288293dc76ef0c15d972 +size 4932874986 diff --git a/flax_model-00020-of-00072.msgpack b/flax_model-00020-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..efc69c07b1b61bc5f917240b6b915d21595635f1 --- /dev/null +++ b/flax_model-00020-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ef553cc45f622e6e83c3c6f11bc84927b95dac1a9f27156a6bc675f1ae26226 +size 4932874986 diff --git a/flax_model-00021-of-00072.msgpack b/flax_model-00021-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..e7d607946eba08695961c9de1787607570f767e6 --- /dev/null +++ b/flax_model-00021-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bec4d8cd23126e6621dae157885a4971d21b91d15f9e733e45dfd94269c90d +size 4932874986 diff --git a/flax_model-00022-of-00072.msgpack b/flax_model-00022-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..3aa1042ffecfab6bb7097ff30939e0b1237dc0fc --- /dev/null +++ b/flax_model-00022-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd0fb09cb2a7cb3454180922580254572a3af588b32696e346e554d95c2b6dad +size 4932874986 diff --git a/flax_model-00023-of-00072.msgpack b/flax_model-00023-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..3b410cc5d166304bf2678a8c86ef6ca736486a14 --- /dev/null +++ b/flax_model-00023-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fb4ca5aea29dda155cf5e7feab53838f1a4394a6214a0331c7b0c7733f1258a +size 4932874986 diff --git a/flax_model-00024-of-00072.msgpack b/flax_model-00024-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..2bf44277cdb41c3cf5bed68483ecf74818209277 --- /dev/null +++ b/flax_model-00024-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d75c471006871ffa8de8a7a08bd5c4ce3edb89f091dc8dc03efdc554e0c51f4 +size 4932874986 diff --git a/flax_model-00025-of-00072.msgpack b/flax_model-00025-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..607088677e6b971f9d40d995d492310a01a0e912 --- /dev/null +++ b/flax_model-00025-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef192c51ef218635873f66ea42b6a841185dac5761e2177b90b7d37ed5ce367f +size 4932874986 diff --git a/flax_model-00026-of-00072.msgpack b/flax_model-00026-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..17c1771f62ab0172d5816d80a1df2016d7823547 --- /dev/null +++ b/flax_model-00026-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a114427c0f3a5f023231dad7bbca85db8f7b2145a66b5d0bd7bdb0995106355 +size 4932874986 diff --git a/flax_model-00027-of-00072.msgpack b/flax_model-00027-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..76f9d12370e3d8e54dfff0292f4a89efc630236e --- /dev/null +++ b/flax_model-00027-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8821f61c70c7d167243b89af4f43f98d28b9a86be5523d735fdbc248585a6d07 +size 4932874986 diff --git a/flax_model-00028-of-00072.msgpack b/flax_model-00028-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..e821d92b0100e0c0db8a65d8c66ed74462521b6f --- /dev/null +++ b/flax_model-00028-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98beb85d43fff4d497854a5389f5a65a67c149ee395b854acbe17cfcb5a3dd2e +size 4932874986 diff --git a/flax_model-00029-of-00072.msgpack b/flax_model-00029-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..6f9d03cf5c57f5911658f8865e496e993c75228e --- /dev/null +++ b/flax_model-00029-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a7ec9dd288f425b0420629606c4fe4f262a8f2c4a77f4016223c36ceb6e0b8 +size 4932874986 diff --git a/flax_model-00030-of-00072.msgpack b/flax_model-00030-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..291eb4146ee0d5ddc6c5eaf2c22315828137121a --- /dev/null +++ b/flax_model-00030-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2160045939407248fb6d153acc9d6b8cace178443fac9dce3d3d1b50855ee9d +size 4932874986 diff --git a/flax_model-00031-of-00072.msgpack b/flax_model-00031-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..ee4ab5132b7d376e77bb3ba5cd85ffb4208d8d0e --- /dev/null +++ b/flax_model-00031-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:815b19ff88f513efb9c26c7bea41a4369218a1b2c90186a174449b3a39ebb906 +size 4932874986 diff --git a/flax_model-00032-of-00072.msgpack b/flax_model-00032-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..08cc4c0cc1261a092d63f338649b0f1dbb9b0182 --- /dev/null +++ b/flax_model-00032-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9671706b18e415fd1f725d9e0642122acc1ae5d8f34af57c61c701b18b7c7f05 +size 4932874986 diff --git a/flax_model-00033-of-00072.msgpack b/flax_model-00033-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..bd75ddb6510fc7abe6724f995c6203aefa557823 --- /dev/null +++ b/flax_model-00033-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864dc217edc469402f1caa59d6f3e844126aa5e465304c5668bb955170a1d345 +size 4932874986 diff --git a/flax_model-00034-of-00072.msgpack b/flax_model-00034-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..973f0cf9955040fef7fc95eac7688eb44bd445ce --- /dev/null +++ b/flax_model-00034-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d6a5381d366b9d4652cfaeb193b783ab725f03360aa79025855f1d7a5476d63 +size 4932874986 diff --git a/flax_model-00035-of-00072.msgpack b/flax_model-00035-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..ecdce25c18037fb17ae6fbe3af925b70f88e5e5d --- /dev/null +++ b/flax_model-00035-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c8d1bbf3c3421d88ca33a027a822fb089c7cdcd7099fc6f4e42137abad9ae68 +size 4932874986 diff --git a/flax_model-00036-of-00072.msgpack b/flax_model-00036-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..580763630b10494bc8aaaa12538b9e5a5d167f5b --- /dev/null +++ b/flax_model-00036-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a904a606d0cc379e9ff2e4426bcab5ada5f26e2cb52d6afc3ebd429cf8d03c +size 4932874986 diff --git a/flax_model-00037-of-00072.msgpack b/flax_model-00037-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..7cdbf2f89a24a2ebafe89fad7437111d66b056b7 --- /dev/null +++ b/flax_model-00037-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4657c1963c1c2c91f5ed170ce492213dd14b79384b9635987e9776c21e7c469d +size 4932874986 diff --git a/flax_model-00038-of-00072.msgpack b/flax_model-00038-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..b9662d54ab14fcc205d3589f6a6e756529099587 --- /dev/null +++ b/flax_model-00038-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:900c154df10b06850c6bab8b25b6c0c74e722fa7da737282f94c27ca03409a6b +size 4932874986 diff --git a/flax_model-00039-of-00072.msgpack b/flax_model-00039-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..3fbc9f8557300b0f1cc06ebf50e668f7fc4d7d1c --- /dev/null +++ b/flax_model-00039-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f51a70d465484b592a6dca9acce91ba2067ff86d7f1c03006474fb3a4feaedd5 +size 4932874986 diff --git a/flax_model-00040-of-00072.msgpack b/flax_model-00040-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..292bf7360612f28dba5e8669954e279748e47236 --- /dev/null +++ b/flax_model-00040-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a45c18aac9d3a1dad59994d179dc84d710033bd8b2cb652b97c7cdb9c708f83f +size 4932874986 diff --git a/flax_model-00041-of-00072.msgpack b/flax_model-00041-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..c4667f1f4e8f361aa8b9cc280b4bcf885ce85d71 --- /dev/null +++ b/flax_model-00041-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e07c79dd7958e7392912f6402db56072027f5a74881e4287376f3cbaaa0c81 +size 4932874986 diff --git a/flax_model-00042-of-00072.msgpack b/flax_model-00042-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..8e009377c8d5a63293439f905eb192e362b3659c --- /dev/null +++ b/flax_model-00042-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a14ac4dc049834fc672df7faf876836050a62e50bae268016a42e25572ba33e +size 4932874986 diff --git a/flax_model-00043-of-00072.msgpack b/flax_model-00043-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..7785c869fb585ccc8e78338b677d05e92bbb26ad --- /dev/null +++ b/flax_model-00043-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6488804dedcf60f747f654b4b60d19f4ef39180f49e1e38b34bda63c376bbdc +size 4932874986 diff --git a/flax_model-00044-of-00072.msgpack b/flax_model-00044-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..f2b837db4731fb71a7fe480700d618dd9b44acb6 --- /dev/null +++ b/flax_model-00044-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc75fac3e1633d9da071a1caec7dfae930104d26f3ac028561658b48f25cb8b +size 4932874986 diff --git a/flax_model-00045-of-00072.msgpack b/flax_model-00045-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..15b15df38eaeabc9bba473d0bbc807e106b1833a --- /dev/null +++ b/flax_model-00045-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eef81ae257fea4e561cc9fc2adec2227639c7c081961eae22e00b7f4be1c46c2 +size 4932874986 diff --git a/flax_model-00046-of-00072.msgpack b/flax_model-00046-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..7bfef45b2b1cce867aadc65332e2f6059508d921 --- /dev/null +++ b/flax_model-00046-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d3046c3dd9e83a1f9c4df4f32dc895129386dfe887808316cc55b191db58745 +size 4932874986 diff --git a/flax_model-00047-of-00072.msgpack b/flax_model-00047-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..922a1015321c754a2517474dfb101ec0cba2dc0a --- /dev/null +++ b/flax_model-00047-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73412400bc08073c590887a2976a988e58b6ab52ba52adc63b26e0d1db2aeff9 +size 4932874986 diff --git a/flax_model-00048-of-00072.msgpack b/flax_model-00048-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..abb203dcb1dd80481c46342abf809f02e65e0d93 --- /dev/null +++ b/flax_model-00048-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5fd1db9fd19b3bb414927dc6bd50ae39a3d241348414c9fc4471feafe3726de +size 4932874986 diff --git a/flax_model-00049-of-00072.msgpack b/flax_model-00049-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..f92a625d3d6ee67b5d4dcf8479b86039a6a50e70 --- /dev/null +++ b/flax_model-00049-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f767275bff82ec18d80352eb125a7cf41bb2fb9ba1297018c112ac7a2aa174 +size 4932874986 diff --git a/flax_model-00050-of-00072.msgpack b/flax_model-00050-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..8b63a9ec5ad5540c1cdbb9ab2fcb8500fc980989 --- /dev/null +++ b/flax_model-00050-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a794f57f5ee18ee682175319921ac778dc6dbfad1ce36dfee6aba2a20a3f5c3e +size 4932874986 diff --git a/flax_model-00051-of-00072.msgpack b/flax_model-00051-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..c3b61920e3ee9429c6749f04376f62bc6ed570a9 --- /dev/null +++ b/flax_model-00051-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:543cc4ac29b75f8ece3261860a8388875ed029763280e0deec30052f6879bc17 +size 4932874986 diff --git a/flax_model-00052-of-00072.msgpack b/flax_model-00052-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..054c3034b751a60e693235fb8fb53171c8eb1123 --- /dev/null +++ b/flax_model-00052-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a633345f9742b76238793ad7be4de0b806361a10116945ea896b6cd67c98f8 +size 4932874986 diff --git a/flax_model-00053-of-00072.msgpack b/flax_model-00053-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..884e7d2f92fd5d061f03efa76ea1adb062374400 --- /dev/null +++ b/flax_model-00053-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200899f8d5201c26d1996b6045f7b51c4ea70679a5eb28dd000d9e5bb3e2d979 +size 4932874986 diff --git a/flax_model-00054-of-00072.msgpack b/flax_model-00054-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..7445817dc6776e1b8bd690a16b1e4809ffd2f2fa --- /dev/null +++ b/flax_model-00054-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f43c50b62de6eb015f5b13d20dc124c07fb6eaf5de531ce2b25b7206758d341 +size 4932874986 diff --git a/flax_model-00055-of-00072.msgpack b/flax_model-00055-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..67072eba1940b1dd6b785e1b09f84a2f5a86d989 --- /dev/null +++ b/flax_model-00055-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d7b7b54602a79a3fd53728d612096ab3302fa1e0f4d33b22e326a7438f90e31 +size 4932874986 diff --git a/flax_model-00056-of-00072.msgpack b/flax_model-00056-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..a3c70dccbe4f25f236cd024eecc5ded4a3122375 --- /dev/null +++ b/flax_model-00056-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc76cd8ada2dfdd6633030576f1a23943b550c996014b6656ddf25a62ab5373 +size 4932874986 diff --git a/flax_model-00057-of-00072.msgpack b/flax_model-00057-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..b64c89a99bc1c5682e2e323005bee2f92d184bd5 --- /dev/null +++ b/flax_model-00057-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87553c976f99b223b19d7eaa28fd3cb8af9ea864f426e90ee3b06c222ff2b90e +size 4932874986 diff --git a/flax_model-00058-of-00072.msgpack b/flax_model-00058-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..2c81d35c7f69302bd2c51addd5300432603c31bd --- /dev/null +++ b/flax_model-00058-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3feeb9c53424dab1268217d85d0a0f4898db18065a90e8bbe4b113187e77b7b7 +size 4932874986 diff --git a/flax_model-00059-of-00072.msgpack b/flax_model-00059-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..a9e67099067caa4ca69bea287e1768d361bddcbb --- /dev/null +++ b/flax_model-00059-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:986f73c9a94e22ebecda56a1e22ddd186cb374f976f021fc3781d817ecd29452 +size 4932874986 diff --git a/flax_model-00060-of-00072.msgpack b/flax_model-00060-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..3e2768e6119ee1972c63cc1ede3a96eb66235c03 --- /dev/null +++ b/flax_model-00060-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a3e6d87338a9b2c497b7fd039f0a3a345e02ffb64164055c6f0eb9f6f46b566 +size 4932874986 diff --git a/flax_model-00061-of-00072.msgpack b/flax_model-00061-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..b0df8c92292fbb6927d891798d79829cd04e838b --- /dev/null +++ b/flax_model-00061-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4eeb6c4f85c2b6b69f4803a1f792fc9bf84657945642cfed32d6cf232713870 +size 4932874986 diff --git a/flax_model-00062-of-00072.msgpack b/flax_model-00062-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..efd3f860d939b526586d2e19aa798cab8f8f71cd --- /dev/null +++ b/flax_model-00062-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ba0fcffce8290ce61bee31eb532f44090269ecab579fc836b12b7507c65bbd3 +size 4932874986 diff --git a/flax_model-00063-of-00072.msgpack b/flax_model-00063-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..993475e2ec72ae3710975174fb0c823dc606b1fe --- /dev/null +++ b/flax_model-00063-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee69df8a3a7310eae7f32484de970fd4f399e3cc6e388766f47234860fbd73d5 +size 4932874986 diff --git a/flax_model-00064-of-00072.msgpack b/flax_model-00064-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..de6192febdd6cc09ae90cd18b89e471233f89fa1 --- /dev/null +++ b/flax_model-00064-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37fc35c7ff8bd43a6306b7d881112a7b0ba7ca57237d2a197da91d192b390237 +size 4932874986 diff --git a/flax_model-00065-of-00072.msgpack b/flax_model-00065-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..01be1951681e2c84f7eaa2daef7a13127d5fd1d7 --- /dev/null +++ b/flax_model-00065-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8fa9be46f1a74e68e9aca9e0321d11431fe8d243359874749e9e67a5ec855c3 +size 4932874986 diff --git a/flax_model-00066-of-00072.msgpack b/flax_model-00066-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..e0463d4be81d243d95363a42d17d7ca5ff798963 --- /dev/null +++ b/flax_model-00066-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41fe44cf0fe12c3184c43f0afe34a660dd77da4c5667f378b279ac2957ea4aab +size 4932874986 diff --git a/flax_model-00067-of-00072.msgpack b/flax_model-00067-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..dbb505414b5720a8538c8ca1bf15454706ae8058 --- /dev/null +++ b/flax_model-00067-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a0be488d355ee7befa11ed6acffa8707a9c356d1d2839bafdb9731134ca957 +size 4932874986 diff --git a/flax_model-00068-of-00072.msgpack b/flax_model-00068-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..845c07d457d94b6e525a6a47e626dc80b4193d5a --- /dev/null +++ b/flax_model-00068-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462ffe21b97f77f02d9e080e4b63028556a06bb198de8f12cc3e6fc1adce8f39 +size 4932874986 diff --git a/flax_model-00069-of-00072.msgpack b/flax_model-00069-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..b95efebc77c7438841399c44dc069b2e82495302 --- /dev/null +++ b/flax_model-00069-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2228d694a954f6b340d57131a6d950ab2a1c8ae35cddde7ad6958d02c26530c8 +size 4932874986 diff --git a/flax_model-00070-of-00072.msgpack b/flax_model-00070-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..3e6902b633bc5c8661e2a639d8329c15c8eecf7e --- /dev/null +++ b/flax_model-00070-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0d6fcc89eb7916491aa6ba13b27c559e2ebe1b44a470282eb138b161dd774d +size 4932874986 diff --git a/flax_model-00071-of-00072.msgpack b/flax_model-00071-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..28d8288ac42dd1784f76816ae45cf2b3464f4e2a --- /dev/null +++ b/flax_model-00071-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1207ae1a9e06252aa3fc2b3e98e3887843fa58c2bbee8c47697feba988bd741 +size 4932874986 diff --git a/flax_model-00072-of-00072.msgpack b/flax_model-00072-of-00072.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..0e940b7377e4427283cca4004bc1c70441211076 --- /dev/null +++ b/flax_model-00072-of-00072.msgpack @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9019c8773fc07a73a53a5c808e883a4db17450acb5cb7a028a90d2c701b164d2 +size 57417 diff --git a/flax_model.msgpack.index.json b/flax_model.msgpack.index.json new file mode 100644 index 0000000000000000000000000000000000000000..4db528444c9b013c94de402884d4b9044ab7b815 --- /dev/null +++ b/flax_model.msgpack.index.json @@ -0,0 +1,852 @@ +{ + "metadata": { + "total_size": 352494542848 + }, + "weight_map": { + "transformer/h/0/input_layernorm/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/input_layernorm/scale": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/mlp/dense_4h_to_h/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/mlp/dense_4h_to_h/kernel": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/mlp/dense_h_to_4h/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/mlp/dense_h_to_4h/kernel": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/post_attention_layernorm/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/post_attention_layernorm/scale": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/self_attention/dense/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/self_attention/dense/kernel": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/self_attention/query_key_value/bias": "flax_model-00002-of-00072.msgpack", + "transformer/h/0/self_attention/query_key_value/kernel": "flax_model-00002-of-00072.msgpack", + "transformer/h/1/input_layernorm/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/input_layernorm/scale": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/mlp/dense_4h_to_h/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/mlp/dense_4h_to_h/kernel": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/mlp/dense_h_to_4h/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/mlp/dense_h_to_4h/kernel": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/post_attention_layernorm/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/post_attention_layernorm/scale": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/self_attention/dense/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/self_attention/dense/kernel": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/self_attention/query_key_value/bias": "flax_model-00003-of-00072.msgpack", + "transformer/h/1/self_attention/query_key_value/kernel": "flax_model-00003-of-00072.msgpack", + "transformer/h/10/input_layernorm/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/input_layernorm/scale": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/mlp/dense_4h_to_h/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/mlp/dense_4h_to_h/kernel": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/mlp/dense_h_to_4h/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/mlp/dense_h_to_4h/kernel": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/post_attention_layernorm/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/post_attention_layernorm/scale": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/self_attention/dense/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/self_attention/dense/kernel": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/self_attention/query_key_value/bias": "flax_model-00012-of-00072.msgpack", + "transformer/h/10/self_attention/query_key_value/kernel": "flax_model-00012-of-00072.msgpack", + "transformer/h/11/input_layernorm/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/input_layernorm/scale": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/mlp/dense_4h_to_h/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/mlp/dense_4h_to_h/kernel": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/mlp/dense_h_to_4h/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/mlp/dense_h_to_4h/kernel": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/post_attention_layernorm/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/post_attention_layernorm/scale": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/self_attention/dense/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/self_attention/dense/kernel": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/self_attention/query_key_value/bias": "flax_model-00013-of-00072.msgpack", + "transformer/h/11/self_attention/query_key_value/kernel": "flax_model-00013-of-00072.msgpack", + "transformer/h/12/input_layernorm/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/input_layernorm/scale": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/mlp/dense_4h_to_h/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/mlp/dense_4h_to_h/kernel": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/mlp/dense_h_to_4h/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/mlp/dense_h_to_4h/kernel": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/post_attention_layernorm/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/post_attention_layernorm/scale": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/self_attention/dense/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/self_attention/dense/kernel": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/self_attention/query_key_value/bias": "flax_model-00014-of-00072.msgpack", + "transformer/h/12/self_attention/query_key_value/kernel": "flax_model-00014-of-00072.msgpack", + "transformer/h/13/input_layernorm/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/input_layernorm/scale": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/mlp/dense_4h_to_h/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/mlp/dense_4h_to_h/kernel": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/mlp/dense_h_to_4h/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/mlp/dense_h_to_4h/kernel": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/post_attention_layernorm/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/post_attention_layernorm/scale": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/self_attention/dense/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/self_attention/dense/kernel": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/self_attention/query_key_value/bias": "flax_model-00015-of-00072.msgpack", + "transformer/h/13/self_attention/query_key_value/kernel": "flax_model-00015-of-00072.msgpack", + "transformer/h/14/input_layernorm/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/input_layernorm/scale": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/mlp/dense_4h_to_h/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/mlp/dense_4h_to_h/kernel": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/mlp/dense_h_to_4h/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/mlp/dense_h_to_4h/kernel": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/post_attention_layernorm/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/post_attention_layernorm/scale": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/self_attention/dense/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/self_attention/dense/kernel": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/self_attention/query_key_value/bias": "flax_model-00016-of-00072.msgpack", + "transformer/h/14/self_attention/query_key_value/kernel": "flax_model-00016-of-00072.msgpack", + "transformer/h/15/input_layernorm/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/input_layernorm/scale": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/mlp/dense_4h_to_h/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/mlp/dense_4h_to_h/kernel": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/mlp/dense_h_to_4h/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/mlp/dense_h_to_4h/kernel": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/post_attention_layernorm/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/post_attention_layernorm/scale": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/self_attention/dense/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/self_attention/dense/kernel": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/self_attention/query_key_value/bias": "flax_model-00017-of-00072.msgpack", + "transformer/h/15/self_attention/query_key_value/kernel": "flax_model-00017-of-00072.msgpack", + "transformer/h/16/input_layernorm/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/input_layernorm/scale": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/mlp/dense_4h_to_h/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/mlp/dense_4h_to_h/kernel": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/mlp/dense_h_to_4h/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/mlp/dense_h_to_4h/kernel": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/post_attention_layernorm/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/post_attention_layernorm/scale": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/self_attention/dense/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/self_attention/dense/kernel": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/self_attention/query_key_value/bias": "flax_model-00018-of-00072.msgpack", + "transformer/h/16/self_attention/query_key_value/kernel": "flax_model-00018-of-00072.msgpack", + "transformer/h/17/input_layernorm/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/input_layernorm/scale": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/mlp/dense_4h_to_h/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/mlp/dense_4h_to_h/kernel": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/mlp/dense_h_to_4h/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/mlp/dense_h_to_4h/kernel": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/post_attention_layernorm/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/post_attention_layernorm/scale": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/self_attention/dense/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/self_attention/dense/kernel": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/self_attention/query_key_value/bias": "flax_model-00019-of-00072.msgpack", + "transformer/h/17/self_attention/query_key_value/kernel": "flax_model-00019-of-00072.msgpack", + "transformer/h/18/input_layernorm/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/input_layernorm/scale": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/mlp/dense_4h_to_h/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/mlp/dense_4h_to_h/kernel": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/mlp/dense_h_to_4h/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/mlp/dense_h_to_4h/kernel": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/post_attention_layernorm/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/post_attention_layernorm/scale": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/self_attention/dense/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/self_attention/dense/kernel": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/self_attention/query_key_value/bias": "flax_model-00020-of-00072.msgpack", + "transformer/h/18/self_attention/query_key_value/kernel": "flax_model-00020-of-00072.msgpack", + "transformer/h/19/input_layernorm/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/input_layernorm/scale": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/mlp/dense_4h_to_h/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/mlp/dense_4h_to_h/kernel": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/mlp/dense_h_to_4h/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/mlp/dense_h_to_4h/kernel": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/post_attention_layernorm/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/post_attention_layernorm/scale": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/self_attention/dense/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/self_attention/dense/kernel": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/self_attention/query_key_value/bias": "flax_model-00021-of-00072.msgpack", + "transformer/h/19/self_attention/query_key_value/kernel": "flax_model-00021-of-00072.msgpack", + "transformer/h/2/input_layernorm/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/input_layernorm/scale": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/mlp/dense_4h_to_h/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/mlp/dense_4h_to_h/kernel": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/mlp/dense_h_to_4h/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/mlp/dense_h_to_4h/kernel": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/post_attention_layernorm/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/post_attention_layernorm/scale": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/self_attention/dense/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/self_attention/dense/kernel": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/self_attention/query_key_value/bias": "flax_model-00004-of-00072.msgpack", + "transformer/h/2/self_attention/query_key_value/kernel": "flax_model-00004-of-00072.msgpack", + "transformer/h/20/input_layernorm/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/input_layernorm/scale": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/mlp/dense_4h_to_h/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/mlp/dense_4h_to_h/kernel": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/mlp/dense_h_to_4h/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/mlp/dense_h_to_4h/kernel": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/post_attention_layernorm/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/post_attention_layernorm/scale": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/self_attention/dense/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/self_attention/dense/kernel": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/self_attention/query_key_value/bias": "flax_model-00022-of-00072.msgpack", + "transformer/h/20/self_attention/query_key_value/kernel": "flax_model-00022-of-00072.msgpack", + "transformer/h/21/input_layernorm/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/input_layernorm/scale": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/mlp/dense_4h_to_h/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/mlp/dense_4h_to_h/kernel": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/mlp/dense_h_to_4h/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/mlp/dense_h_to_4h/kernel": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/post_attention_layernorm/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/post_attention_layernorm/scale": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/self_attention/dense/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/self_attention/dense/kernel": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/self_attention/query_key_value/bias": "flax_model-00023-of-00072.msgpack", + "transformer/h/21/self_attention/query_key_value/kernel": "flax_model-00023-of-00072.msgpack", + "transformer/h/22/input_layernorm/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/input_layernorm/scale": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/mlp/dense_4h_to_h/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/mlp/dense_4h_to_h/kernel": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/mlp/dense_h_to_4h/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/mlp/dense_h_to_4h/kernel": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/post_attention_layernorm/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/post_attention_layernorm/scale": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/self_attention/dense/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/self_attention/dense/kernel": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/self_attention/query_key_value/bias": "flax_model-00024-of-00072.msgpack", + "transformer/h/22/self_attention/query_key_value/kernel": "flax_model-00024-of-00072.msgpack", + "transformer/h/23/input_layernorm/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/input_layernorm/scale": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/mlp/dense_4h_to_h/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/mlp/dense_4h_to_h/kernel": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/mlp/dense_h_to_4h/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/mlp/dense_h_to_4h/kernel": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/post_attention_layernorm/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/post_attention_layernorm/scale": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/self_attention/dense/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/self_attention/dense/kernel": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/self_attention/query_key_value/bias": "flax_model-00025-of-00072.msgpack", + "transformer/h/23/self_attention/query_key_value/kernel": "flax_model-00025-of-00072.msgpack", + "transformer/h/24/input_layernorm/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/input_layernorm/scale": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/mlp/dense_4h_to_h/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/mlp/dense_4h_to_h/kernel": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/mlp/dense_h_to_4h/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/mlp/dense_h_to_4h/kernel": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/post_attention_layernorm/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/post_attention_layernorm/scale": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/self_attention/dense/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/self_attention/dense/kernel": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/self_attention/query_key_value/bias": "flax_model-00026-of-00072.msgpack", + "transformer/h/24/self_attention/query_key_value/kernel": "flax_model-00026-of-00072.msgpack", + "transformer/h/25/input_layernorm/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/input_layernorm/scale": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/mlp/dense_4h_to_h/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/mlp/dense_4h_to_h/kernel": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/mlp/dense_h_to_4h/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/mlp/dense_h_to_4h/kernel": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/post_attention_layernorm/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/post_attention_layernorm/scale": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/self_attention/dense/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/self_attention/dense/kernel": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/self_attention/query_key_value/bias": "flax_model-00027-of-00072.msgpack", + "transformer/h/25/self_attention/query_key_value/kernel": "flax_model-00027-of-00072.msgpack", + "transformer/h/26/input_layernorm/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/input_layernorm/scale": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/mlp/dense_4h_to_h/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/mlp/dense_4h_to_h/kernel": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/mlp/dense_h_to_4h/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/mlp/dense_h_to_4h/kernel": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/post_attention_layernorm/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/post_attention_layernorm/scale": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/self_attention/dense/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/self_attention/dense/kernel": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/self_attention/query_key_value/bias": "flax_model-00028-of-00072.msgpack", + "transformer/h/26/self_attention/query_key_value/kernel": "flax_model-00028-of-00072.msgpack", + "transformer/h/27/input_layernorm/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/input_layernorm/scale": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/mlp/dense_4h_to_h/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/mlp/dense_4h_to_h/kernel": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/mlp/dense_h_to_4h/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/mlp/dense_h_to_4h/kernel": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/post_attention_layernorm/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/post_attention_layernorm/scale": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/self_attention/dense/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/self_attention/dense/kernel": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/self_attention/query_key_value/bias": "flax_model-00029-of-00072.msgpack", + "transformer/h/27/self_attention/query_key_value/kernel": "flax_model-00029-of-00072.msgpack", + "transformer/h/28/input_layernorm/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/input_layernorm/scale": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/mlp/dense_4h_to_h/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/mlp/dense_4h_to_h/kernel": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/mlp/dense_h_to_4h/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/mlp/dense_h_to_4h/kernel": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/post_attention_layernorm/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/post_attention_layernorm/scale": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/self_attention/dense/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/self_attention/dense/kernel": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/self_attention/query_key_value/bias": "flax_model-00030-of-00072.msgpack", + "transformer/h/28/self_attention/query_key_value/kernel": "flax_model-00030-of-00072.msgpack", + "transformer/h/29/input_layernorm/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/input_layernorm/scale": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/mlp/dense_4h_to_h/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/mlp/dense_4h_to_h/kernel": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/mlp/dense_h_to_4h/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/mlp/dense_h_to_4h/kernel": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/post_attention_layernorm/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/post_attention_layernorm/scale": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/self_attention/dense/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/self_attention/dense/kernel": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/self_attention/query_key_value/bias": "flax_model-00031-of-00072.msgpack", + "transformer/h/29/self_attention/query_key_value/kernel": "flax_model-00031-of-00072.msgpack", + "transformer/h/3/input_layernorm/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/input_layernorm/scale": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/mlp/dense_4h_to_h/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/mlp/dense_4h_to_h/kernel": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/mlp/dense_h_to_4h/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/mlp/dense_h_to_4h/kernel": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/post_attention_layernorm/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/post_attention_layernorm/scale": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/self_attention/dense/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/self_attention/dense/kernel": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/self_attention/query_key_value/bias": "flax_model-00005-of-00072.msgpack", + "transformer/h/3/self_attention/query_key_value/kernel": "flax_model-00005-of-00072.msgpack", + "transformer/h/30/input_layernorm/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/input_layernorm/scale": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/mlp/dense_4h_to_h/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/mlp/dense_4h_to_h/kernel": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/mlp/dense_h_to_4h/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/mlp/dense_h_to_4h/kernel": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/post_attention_layernorm/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/post_attention_layernorm/scale": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/self_attention/dense/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/self_attention/dense/kernel": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/self_attention/query_key_value/bias": "flax_model-00032-of-00072.msgpack", + "transformer/h/30/self_attention/query_key_value/kernel": "flax_model-00032-of-00072.msgpack", + "transformer/h/31/input_layernorm/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/input_layernorm/scale": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/mlp/dense_4h_to_h/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/mlp/dense_4h_to_h/kernel": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/mlp/dense_h_to_4h/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/mlp/dense_h_to_4h/kernel": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/post_attention_layernorm/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/post_attention_layernorm/scale": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/self_attention/dense/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/self_attention/dense/kernel": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/self_attention/query_key_value/bias": "flax_model-00033-of-00072.msgpack", + "transformer/h/31/self_attention/query_key_value/kernel": "flax_model-00033-of-00072.msgpack", + "transformer/h/32/input_layernorm/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/input_layernorm/scale": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/mlp/dense_4h_to_h/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/mlp/dense_4h_to_h/kernel": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/mlp/dense_h_to_4h/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/mlp/dense_h_to_4h/kernel": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/post_attention_layernorm/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/post_attention_layernorm/scale": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/self_attention/dense/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/self_attention/dense/kernel": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/self_attention/query_key_value/bias": "flax_model-00034-of-00072.msgpack", + "transformer/h/32/self_attention/query_key_value/kernel": "flax_model-00034-of-00072.msgpack", + "transformer/h/33/input_layernorm/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/input_layernorm/scale": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/mlp/dense_4h_to_h/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/mlp/dense_4h_to_h/kernel": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/mlp/dense_h_to_4h/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/mlp/dense_h_to_4h/kernel": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/post_attention_layernorm/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/post_attention_layernorm/scale": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/self_attention/dense/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/self_attention/dense/kernel": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/self_attention/query_key_value/bias": "flax_model-00035-of-00072.msgpack", + "transformer/h/33/self_attention/query_key_value/kernel": "flax_model-00035-of-00072.msgpack", + "transformer/h/34/input_layernorm/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/input_layernorm/scale": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/mlp/dense_4h_to_h/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/mlp/dense_4h_to_h/kernel": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/mlp/dense_h_to_4h/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/mlp/dense_h_to_4h/kernel": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/post_attention_layernorm/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/post_attention_layernorm/scale": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/self_attention/dense/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/self_attention/dense/kernel": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/self_attention/query_key_value/bias": "flax_model-00036-of-00072.msgpack", + "transformer/h/34/self_attention/query_key_value/kernel": "flax_model-00036-of-00072.msgpack", + "transformer/h/35/input_layernorm/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/input_layernorm/scale": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/mlp/dense_4h_to_h/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/mlp/dense_4h_to_h/kernel": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/mlp/dense_h_to_4h/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/mlp/dense_h_to_4h/kernel": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/post_attention_layernorm/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/post_attention_layernorm/scale": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/self_attention/dense/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/self_attention/dense/kernel": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/self_attention/query_key_value/bias": "flax_model-00037-of-00072.msgpack", + "transformer/h/35/self_attention/query_key_value/kernel": "flax_model-00037-of-00072.msgpack", + "transformer/h/36/input_layernorm/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/input_layernorm/scale": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/mlp/dense_4h_to_h/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/mlp/dense_4h_to_h/kernel": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/mlp/dense_h_to_4h/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/mlp/dense_h_to_4h/kernel": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/post_attention_layernorm/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/post_attention_layernorm/scale": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/self_attention/dense/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/self_attention/dense/kernel": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/self_attention/query_key_value/bias": "flax_model-00038-of-00072.msgpack", + "transformer/h/36/self_attention/query_key_value/kernel": "flax_model-00038-of-00072.msgpack", + "transformer/h/37/input_layernorm/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/input_layernorm/scale": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/mlp/dense_4h_to_h/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/mlp/dense_4h_to_h/kernel": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/mlp/dense_h_to_4h/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/mlp/dense_h_to_4h/kernel": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/post_attention_layernorm/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/post_attention_layernorm/scale": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/self_attention/dense/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/self_attention/dense/kernel": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/self_attention/query_key_value/bias": "flax_model-00039-of-00072.msgpack", + "transformer/h/37/self_attention/query_key_value/kernel": "flax_model-00039-of-00072.msgpack", + "transformer/h/38/input_layernorm/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/input_layernorm/scale": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/mlp/dense_4h_to_h/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/mlp/dense_4h_to_h/kernel": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/mlp/dense_h_to_4h/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/mlp/dense_h_to_4h/kernel": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/post_attention_layernorm/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/post_attention_layernorm/scale": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/self_attention/dense/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/self_attention/dense/kernel": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/self_attention/query_key_value/bias": "flax_model-00040-of-00072.msgpack", + "transformer/h/38/self_attention/query_key_value/kernel": "flax_model-00040-of-00072.msgpack", + "transformer/h/39/input_layernorm/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/input_layernorm/scale": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/mlp/dense_4h_to_h/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/mlp/dense_4h_to_h/kernel": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/mlp/dense_h_to_4h/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/mlp/dense_h_to_4h/kernel": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/post_attention_layernorm/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/post_attention_layernorm/scale": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/self_attention/dense/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/self_attention/dense/kernel": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/self_attention/query_key_value/bias": "flax_model-00041-of-00072.msgpack", + "transformer/h/39/self_attention/query_key_value/kernel": "flax_model-00041-of-00072.msgpack", + "transformer/h/4/input_layernorm/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/input_layernorm/scale": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/mlp/dense_4h_to_h/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/mlp/dense_4h_to_h/kernel": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/mlp/dense_h_to_4h/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/mlp/dense_h_to_4h/kernel": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/post_attention_layernorm/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/post_attention_layernorm/scale": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/self_attention/dense/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/self_attention/dense/kernel": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/self_attention/query_key_value/bias": "flax_model-00006-of-00072.msgpack", + "transformer/h/4/self_attention/query_key_value/kernel": "flax_model-00006-of-00072.msgpack", + "transformer/h/40/input_layernorm/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/input_layernorm/scale": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/mlp/dense_4h_to_h/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/mlp/dense_4h_to_h/kernel": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/mlp/dense_h_to_4h/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/mlp/dense_h_to_4h/kernel": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/post_attention_layernorm/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/post_attention_layernorm/scale": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/self_attention/dense/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/self_attention/dense/kernel": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/self_attention/query_key_value/bias": "flax_model-00042-of-00072.msgpack", + "transformer/h/40/self_attention/query_key_value/kernel": "flax_model-00042-of-00072.msgpack", + "transformer/h/41/input_layernorm/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/input_layernorm/scale": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/mlp/dense_4h_to_h/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/mlp/dense_4h_to_h/kernel": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/mlp/dense_h_to_4h/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/mlp/dense_h_to_4h/kernel": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/post_attention_layernorm/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/post_attention_layernorm/scale": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/self_attention/dense/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/self_attention/dense/kernel": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/self_attention/query_key_value/bias": "flax_model-00043-of-00072.msgpack", + "transformer/h/41/self_attention/query_key_value/kernel": "flax_model-00043-of-00072.msgpack", + "transformer/h/42/input_layernorm/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/input_layernorm/scale": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/mlp/dense_4h_to_h/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/mlp/dense_4h_to_h/kernel": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/mlp/dense_h_to_4h/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/mlp/dense_h_to_4h/kernel": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/post_attention_layernorm/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/post_attention_layernorm/scale": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/self_attention/dense/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/self_attention/dense/kernel": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/self_attention/query_key_value/bias": "flax_model-00044-of-00072.msgpack", + "transformer/h/42/self_attention/query_key_value/kernel": "flax_model-00044-of-00072.msgpack", + "transformer/h/43/input_layernorm/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/input_layernorm/scale": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/mlp/dense_4h_to_h/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/mlp/dense_4h_to_h/kernel": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/mlp/dense_h_to_4h/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/mlp/dense_h_to_4h/kernel": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/post_attention_layernorm/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/post_attention_layernorm/scale": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/self_attention/dense/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/self_attention/dense/kernel": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/self_attention/query_key_value/bias": "flax_model-00045-of-00072.msgpack", + "transformer/h/43/self_attention/query_key_value/kernel": "flax_model-00045-of-00072.msgpack", + "transformer/h/44/input_layernorm/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/input_layernorm/scale": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/mlp/dense_4h_to_h/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/mlp/dense_4h_to_h/kernel": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/mlp/dense_h_to_4h/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/mlp/dense_h_to_4h/kernel": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/post_attention_layernorm/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/post_attention_layernorm/scale": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/self_attention/dense/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/self_attention/dense/kernel": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/self_attention/query_key_value/bias": "flax_model-00046-of-00072.msgpack", + "transformer/h/44/self_attention/query_key_value/kernel": "flax_model-00046-of-00072.msgpack", + "transformer/h/45/input_layernorm/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/input_layernorm/scale": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/mlp/dense_4h_to_h/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/mlp/dense_4h_to_h/kernel": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/mlp/dense_h_to_4h/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/mlp/dense_h_to_4h/kernel": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/post_attention_layernorm/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/post_attention_layernorm/scale": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/self_attention/dense/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/self_attention/dense/kernel": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/self_attention/query_key_value/bias": "flax_model-00047-of-00072.msgpack", + "transformer/h/45/self_attention/query_key_value/kernel": "flax_model-00047-of-00072.msgpack", + "transformer/h/46/input_layernorm/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/input_layernorm/scale": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/mlp/dense_4h_to_h/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/mlp/dense_4h_to_h/kernel": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/mlp/dense_h_to_4h/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/mlp/dense_h_to_4h/kernel": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/post_attention_layernorm/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/post_attention_layernorm/scale": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/self_attention/dense/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/self_attention/dense/kernel": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/self_attention/query_key_value/bias": "flax_model-00048-of-00072.msgpack", + "transformer/h/46/self_attention/query_key_value/kernel": "flax_model-00048-of-00072.msgpack", + "transformer/h/47/input_layernorm/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/input_layernorm/scale": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/mlp/dense_4h_to_h/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/mlp/dense_4h_to_h/kernel": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/mlp/dense_h_to_4h/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/mlp/dense_h_to_4h/kernel": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/post_attention_layernorm/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/post_attention_layernorm/scale": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/self_attention/dense/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/self_attention/dense/kernel": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/self_attention/query_key_value/bias": "flax_model-00049-of-00072.msgpack", + "transformer/h/47/self_attention/query_key_value/kernel": "flax_model-00049-of-00072.msgpack", + "transformer/h/48/input_layernorm/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/input_layernorm/scale": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/mlp/dense_4h_to_h/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/mlp/dense_4h_to_h/kernel": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/mlp/dense_h_to_4h/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/mlp/dense_h_to_4h/kernel": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/post_attention_layernorm/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/post_attention_layernorm/scale": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/self_attention/dense/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/self_attention/dense/kernel": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/self_attention/query_key_value/bias": "flax_model-00050-of-00072.msgpack", + "transformer/h/48/self_attention/query_key_value/kernel": "flax_model-00050-of-00072.msgpack", + "transformer/h/49/input_layernorm/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/input_layernorm/scale": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/mlp/dense_4h_to_h/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/mlp/dense_4h_to_h/kernel": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/mlp/dense_h_to_4h/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/mlp/dense_h_to_4h/kernel": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/post_attention_layernorm/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/post_attention_layernorm/scale": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/self_attention/dense/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/self_attention/dense/kernel": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/self_attention/query_key_value/bias": "flax_model-00051-of-00072.msgpack", + "transformer/h/49/self_attention/query_key_value/kernel": "flax_model-00051-of-00072.msgpack", + "transformer/h/5/input_layernorm/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/input_layernorm/scale": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/mlp/dense_4h_to_h/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/mlp/dense_4h_to_h/kernel": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/mlp/dense_h_to_4h/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/mlp/dense_h_to_4h/kernel": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/post_attention_layernorm/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/post_attention_layernorm/scale": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/self_attention/dense/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/self_attention/dense/kernel": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/self_attention/query_key_value/bias": "flax_model-00007-of-00072.msgpack", + "transformer/h/5/self_attention/query_key_value/kernel": "flax_model-00007-of-00072.msgpack", + "transformer/h/50/input_layernorm/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/input_layernorm/scale": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/mlp/dense_4h_to_h/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/mlp/dense_4h_to_h/kernel": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/mlp/dense_h_to_4h/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/mlp/dense_h_to_4h/kernel": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/post_attention_layernorm/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/post_attention_layernorm/scale": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/self_attention/dense/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/self_attention/dense/kernel": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/self_attention/query_key_value/bias": "flax_model-00052-of-00072.msgpack", + "transformer/h/50/self_attention/query_key_value/kernel": "flax_model-00052-of-00072.msgpack", + "transformer/h/51/input_layernorm/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/input_layernorm/scale": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/mlp/dense_4h_to_h/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/mlp/dense_4h_to_h/kernel": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/mlp/dense_h_to_4h/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/mlp/dense_h_to_4h/kernel": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/post_attention_layernorm/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/post_attention_layernorm/scale": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/self_attention/dense/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/self_attention/dense/kernel": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/self_attention/query_key_value/bias": "flax_model-00053-of-00072.msgpack", + "transformer/h/51/self_attention/query_key_value/kernel": "flax_model-00053-of-00072.msgpack", + "transformer/h/52/input_layernorm/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/input_layernorm/scale": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/mlp/dense_4h_to_h/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/mlp/dense_4h_to_h/kernel": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/mlp/dense_h_to_4h/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/mlp/dense_h_to_4h/kernel": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/post_attention_layernorm/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/post_attention_layernorm/scale": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/self_attention/dense/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/self_attention/dense/kernel": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/self_attention/query_key_value/bias": "flax_model-00054-of-00072.msgpack", + "transformer/h/52/self_attention/query_key_value/kernel": "flax_model-00054-of-00072.msgpack", + "transformer/h/53/input_layernorm/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/input_layernorm/scale": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/mlp/dense_4h_to_h/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/mlp/dense_4h_to_h/kernel": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/mlp/dense_h_to_4h/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/mlp/dense_h_to_4h/kernel": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/post_attention_layernorm/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/post_attention_layernorm/scale": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/self_attention/dense/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/self_attention/dense/kernel": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/self_attention/query_key_value/bias": "flax_model-00055-of-00072.msgpack", + "transformer/h/53/self_attention/query_key_value/kernel": "flax_model-00055-of-00072.msgpack", + "transformer/h/54/input_layernorm/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/input_layernorm/scale": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/mlp/dense_4h_to_h/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/mlp/dense_4h_to_h/kernel": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/mlp/dense_h_to_4h/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/mlp/dense_h_to_4h/kernel": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/post_attention_layernorm/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/post_attention_layernorm/scale": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/self_attention/dense/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/self_attention/dense/kernel": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/self_attention/query_key_value/bias": "flax_model-00056-of-00072.msgpack", + "transformer/h/54/self_attention/query_key_value/kernel": "flax_model-00056-of-00072.msgpack", + "transformer/h/55/input_layernorm/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/input_layernorm/scale": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/mlp/dense_4h_to_h/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/mlp/dense_4h_to_h/kernel": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/mlp/dense_h_to_4h/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/mlp/dense_h_to_4h/kernel": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/post_attention_layernorm/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/post_attention_layernorm/scale": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/self_attention/dense/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/self_attention/dense/kernel": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/self_attention/query_key_value/bias": "flax_model-00057-of-00072.msgpack", + "transformer/h/55/self_attention/query_key_value/kernel": "flax_model-00057-of-00072.msgpack", + "transformer/h/56/input_layernorm/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/input_layernorm/scale": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/mlp/dense_4h_to_h/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/mlp/dense_4h_to_h/kernel": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/mlp/dense_h_to_4h/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/mlp/dense_h_to_4h/kernel": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/post_attention_layernorm/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/post_attention_layernorm/scale": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/self_attention/dense/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/self_attention/dense/kernel": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/self_attention/query_key_value/bias": "flax_model-00058-of-00072.msgpack", + "transformer/h/56/self_attention/query_key_value/kernel": "flax_model-00058-of-00072.msgpack", + "transformer/h/57/input_layernorm/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/input_layernorm/scale": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/mlp/dense_4h_to_h/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/mlp/dense_4h_to_h/kernel": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/mlp/dense_h_to_4h/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/mlp/dense_h_to_4h/kernel": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/post_attention_layernorm/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/post_attention_layernorm/scale": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/self_attention/dense/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/self_attention/dense/kernel": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/self_attention/query_key_value/bias": "flax_model-00059-of-00072.msgpack", + "transformer/h/57/self_attention/query_key_value/kernel": "flax_model-00059-of-00072.msgpack", + "transformer/h/58/input_layernorm/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/input_layernorm/scale": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/mlp/dense_4h_to_h/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/mlp/dense_4h_to_h/kernel": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/mlp/dense_h_to_4h/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/mlp/dense_h_to_4h/kernel": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/post_attention_layernorm/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/post_attention_layernorm/scale": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/self_attention/dense/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/self_attention/dense/kernel": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/self_attention/query_key_value/bias": "flax_model-00060-of-00072.msgpack", + "transformer/h/58/self_attention/query_key_value/kernel": "flax_model-00060-of-00072.msgpack", + "transformer/h/59/input_layernorm/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/input_layernorm/scale": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/mlp/dense_4h_to_h/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/mlp/dense_4h_to_h/kernel": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/mlp/dense_h_to_4h/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/mlp/dense_h_to_4h/kernel": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/post_attention_layernorm/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/post_attention_layernorm/scale": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/self_attention/dense/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/self_attention/dense/kernel": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/self_attention/query_key_value/bias": "flax_model-00061-of-00072.msgpack", + "transformer/h/59/self_attention/query_key_value/kernel": "flax_model-00061-of-00072.msgpack", + "transformer/h/6/input_layernorm/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/input_layernorm/scale": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/mlp/dense_4h_to_h/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/mlp/dense_4h_to_h/kernel": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/mlp/dense_h_to_4h/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/mlp/dense_h_to_4h/kernel": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/post_attention_layernorm/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/post_attention_layernorm/scale": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/self_attention/dense/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/self_attention/dense/kernel": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/self_attention/query_key_value/bias": "flax_model-00008-of-00072.msgpack", + "transformer/h/6/self_attention/query_key_value/kernel": "flax_model-00008-of-00072.msgpack", + "transformer/h/60/input_layernorm/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/input_layernorm/scale": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/mlp/dense_4h_to_h/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/mlp/dense_4h_to_h/kernel": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/mlp/dense_h_to_4h/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/mlp/dense_h_to_4h/kernel": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/post_attention_layernorm/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/post_attention_layernorm/scale": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/self_attention/dense/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/self_attention/dense/kernel": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/self_attention/query_key_value/bias": "flax_model-00062-of-00072.msgpack", + "transformer/h/60/self_attention/query_key_value/kernel": "flax_model-00062-of-00072.msgpack", + "transformer/h/61/input_layernorm/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/input_layernorm/scale": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/mlp/dense_4h_to_h/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/mlp/dense_4h_to_h/kernel": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/mlp/dense_h_to_4h/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/mlp/dense_h_to_4h/kernel": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/post_attention_layernorm/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/post_attention_layernorm/scale": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/self_attention/dense/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/self_attention/dense/kernel": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/self_attention/query_key_value/bias": "flax_model-00063-of-00072.msgpack", + "transformer/h/61/self_attention/query_key_value/kernel": "flax_model-00063-of-00072.msgpack", + "transformer/h/62/input_layernorm/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/input_layernorm/scale": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/mlp/dense_4h_to_h/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/mlp/dense_4h_to_h/kernel": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/mlp/dense_h_to_4h/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/mlp/dense_h_to_4h/kernel": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/post_attention_layernorm/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/post_attention_layernorm/scale": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/self_attention/dense/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/self_attention/dense/kernel": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/self_attention/query_key_value/bias": "flax_model-00064-of-00072.msgpack", + "transformer/h/62/self_attention/query_key_value/kernel": "flax_model-00064-of-00072.msgpack", + "transformer/h/63/input_layernorm/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/input_layernorm/scale": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/mlp/dense_4h_to_h/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/mlp/dense_4h_to_h/kernel": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/mlp/dense_h_to_4h/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/mlp/dense_h_to_4h/kernel": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/post_attention_layernorm/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/post_attention_layernorm/scale": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/self_attention/dense/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/self_attention/dense/kernel": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/self_attention/query_key_value/bias": "flax_model-00065-of-00072.msgpack", + "transformer/h/63/self_attention/query_key_value/kernel": "flax_model-00065-of-00072.msgpack", + "transformer/h/64/input_layernorm/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/input_layernorm/scale": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/mlp/dense_4h_to_h/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/mlp/dense_4h_to_h/kernel": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/mlp/dense_h_to_4h/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/mlp/dense_h_to_4h/kernel": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/post_attention_layernorm/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/post_attention_layernorm/scale": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/self_attention/dense/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/self_attention/dense/kernel": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/self_attention/query_key_value/bias": "flax_model-00066-of-00072.msgpack", + "transformer/h/64/self_attention/query_key_value/kernel": "flax_model-00066-of-00072.msgpack", + "transformer/h/65/input_layernorm/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/input_layernorm/scale": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/mlp/dense_4h_to_h/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/mlp/dense_4h_to_h/kernel": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/mlp/dense_h_to_4h/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/mlp/dense_h_to_4h/kernel": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/post_attention_layernorm/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/post_attention_layernorm/scale": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/self_attention/dense/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/self_attention/dense/kernel": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/self_attention/query_key_value/bias": "flax_model-00067-of-00072.msgpack", + "transformer/h/65/self_attention/query_key_value/kernel": "flax_model-00067-of-00072.msgpack", + "transformer/h/66/input_layernorm/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/input_layernorm/scale": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/mlp/dense_4h_to_h/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/mlp/dense_4h_to_h/kernel": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/mlp/dense_h_to_4h/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/mlp/dense_h_to_4h/kernel": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/post_attention_layernorm/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/post_attention_layernorm/scale": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/self_attention/dense/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/self_attention/dense/kernel": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/self_attention/query_key_value/bias": "flax_model-00068-of-00072.msgpack", + "transformer/h/66/self_attention/query_key_value/kernel": "flax_model-00068-of-00072.msgpack", + "transformer/h/67/input_layernorm/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/input_layernorm/scale": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/mlp/dense_4h_to_h/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/mlp/dense_4h_to_h/kernel": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/mlp/dense_h_to_4h/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/mlp/dense_h_to_4h/kernel": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/post_attention_layernorm/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/post_attention_layernorm/scale": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/self_attention/dense/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/self_attention/dense/kernel": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/self_attention/query_key_value/bias": "flax_model-00069-of-00072.msgpack", + "transformer/h/67/self_attention/query_key_value/kernel": "flax_model-00069-of-00072.msgpack", + "transformer/h/68/input_layernorm/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/input_layernorm/scale": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/mlp/dense_4h_to_h/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/mlp/dense_4h_to_h/kernel": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/mlp/dense_h_to_4h/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/mlp/dense_h_to_4h/kernel": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/post_attention_layernorm/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/post_attention_layernorm/scale": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/self_attention/dense/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/self_attention/dense/kernel": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/self_attention/query_key_value/bias": "flax_model-00070-of-00072.msgpack", + "transformer/h/68/self_attention/query_key_value/kernel": "flax_model-00070-of-00072.msgpack", + "transformer/h/69/input_layernorm/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/input_layernorm/scale": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/mlp/dense_4h_to_h/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/mlp/dense_4h_to_h/kernel": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/mlp/dense_h_to_4h/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/mlp/dense_h_to_4h/kernel": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/post_attention_layernorm/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/post_attention_layernorm/scale": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/self_attention/dense/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/self_attention/dense/kernel": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/self_attention/query_key_value/bias": "flax_model-00071-of-00072.msgpack", + "transformer/h/69/self_attention/query_key_value/kernel": "flax_model-00071-of-00072.msgpack", + "transformer/h/7/input_layernorm/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/input_layernorm/scale": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/mlp/dense_4h_to_h/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/mlp/dense_4h_to_h/kernel": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/mlp/dense_h_to_4h/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/mlp/dense_h_to_4h/kernel": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/post_attention_layernorm/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/post_attention_layernorm/scale": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/self_attention/dense/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/self_attention/dense/kernel": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/self_attention/query_key_value/bias": "flax_model-00009-of-00072.msgpack", + "transformer/h/7/self_attention/query_key_value/kernel": "flax_model-00009-of-00072.msgpack", + "transformer/h/8/input_layernorm/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/input_layernorm/scale": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/mlp/dense_4h_to_h/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/mlp/dense_4h_to_h/kernel": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/mlp/dense_h_to_4h/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/mlp/dense_h_to_4h/kernel": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/post_attention_layernorm/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/post_attention_layernorm/scale": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/self_attention/dense/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/self_attention/dense/kernel": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/self_attention/query_key_value/bias": "flax_model-00010-of-00072.msgpack", + "transformer/h/8/self_attention/query_key_value/kernel": "flax_model-00010-of-00072.msgpack", + "transformer/h/9/input_layernorm/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/input_layernorm/scale": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/mlp/dense_4h_to_h/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/mlp/dense_4h_to_h/kernel": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/mlp/dense_h_to_4h/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/mlp/dense_h_to_4h/kernel": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/post_attention_layernorm/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/post_attention_layernorm/scale": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/self_attention/dense/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/self_attention/dense/kernel": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/self_attention/query_key_value/bias": "flax_model-00011-of-00072.msgpack", + "transformer/h/9/self_attention/query_key_value/kernel": "flax_model-00011-of-00072.msgpack", + "transformer/ln_f/bias": "flax_model-00072-of-00072.msgpack", + "transformer/ln_f/scale": "flax_model-00072-of-00072.msgpack", + "transformer/word_embeddings/embedding": "flax_model-00001-of-00072.msgpack", + "transformer/word_embeddings_layernorm/bias": "flax_model-00001-of-00072.msgpack", + "transformer/word_embeddings_layernorm/scale": "flax_model-00001-of-00072.msgpack" + } +}