diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..f6093c86014c94348f18bf42c2e404e6fbde995d --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,7 @@ +{ + "": 2, + "": 1, + "": 0, + "<|im_end|>": 32000, + "<|im_start|>": 32001 +} diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..e7fb4d685ff586fba25578e236b8eda85d7ff0eb --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,5611 @@ +{ + "metadata": { + "ParamSize": 453 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262160384, + "records": [ + { + "name": "param_0", + "shape": [ + 32002, + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262160384, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_7", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_10", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_1", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_2", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_3", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_4", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_5", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_6", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_8", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_9", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_11", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_12", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_13", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_14", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_21", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_24", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_15", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_16", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_17", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_18", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_19", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_20", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_22", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_23", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_25", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_26", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_27", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_28", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_35", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_38", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_29", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_30", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_31", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_32", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_33", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_34", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_36", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_37", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_39", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_40", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_41", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_42", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_49", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_52", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_43", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_44", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_45", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_46", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_47", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_48", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_50", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_51", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_53", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_54", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_55", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_56", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_63", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_66", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_57", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_58", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_59", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_60", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_61", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_62", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_64", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_65", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_67", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_68", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_69", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_70", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_77", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_80", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_71", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_72", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_73", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_74", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_75", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_76", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_78", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_79", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_81", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_82", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_83", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_84", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_91", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_94", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_85", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_86", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_87", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_88", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_89", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_90", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_92", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_93", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_95", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_96", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_97", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_98", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_105", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_108", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_99", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_100", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_101", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_102", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_103", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_104", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_106", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_107", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_109", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_110", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_111", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_112", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_119", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_122", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_113", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_114", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_115", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_116", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_117", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_118", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_120", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_121", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_123", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_124", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_125", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_126", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_133", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_136", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_127", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_128", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_129", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_130", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_131", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_132", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_134", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_135", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_137", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_138", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_139", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_140", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_147", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_150", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_141", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_142", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_143", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_144", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_145", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_146", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_148", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_149", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_151", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_152", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_153", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_154", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_161", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_164", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_155", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_156", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_157", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_158", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_159", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_160", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_162", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_163", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_165", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_166", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_167", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_168", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_175", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_178", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_169", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_170", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_171", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_172", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_173", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_174", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_176", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_177", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_179", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_180", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_181", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_182", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_189", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_192", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_183", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_184", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_185", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_186", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_187", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_188", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_190", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_191", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_193", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_194", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_195", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_196", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_203", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_206", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_197", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_198", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_199", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_200", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_201", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_202", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_204", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_205", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_207", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_208", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_209", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_210", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_217", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_220", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_211", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_212", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_213", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_214", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_215", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_216", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_218", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_219", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_221", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_222", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_223", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_224", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_231", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_234", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_225", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_226", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_227", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_228", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_229", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_230", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_232", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_233", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_235", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_236", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_237", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_238", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_245", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_248", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_239", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_240", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_241", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_242", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_243", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_244", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_246", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_247", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_249", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_250", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_251", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_252", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_259", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_262", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_253", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_254", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_255", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_256", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_257", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_258", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_260", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_261", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_263", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_264", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_265", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_266", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_273", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_276", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_267", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_268", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_269", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_270", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_271", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_272", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_274", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_275", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_277", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_278", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_279", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_280", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_287", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_290", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_281", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_282", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_283", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_284", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_285", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_286", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_288", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_289", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_291", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_292", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_293", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_294", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_301", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_304", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_295", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_296", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_297", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_298", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_299", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_300", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_302", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_303", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_305", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_306", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_307", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_308", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_315", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_318", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_309", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_310", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_311", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_312", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_313", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_314", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_316", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_317", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_319", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_320", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_321", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_322", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_329", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_332", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_323", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_324", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_325", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_326", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_327", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_328", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_330", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_331", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_333", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_334", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_335", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_336", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_343", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_346", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_337", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_338", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_339", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_340", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_341", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_342", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_344", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_345", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_347", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_348", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_349", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_350", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_357", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_360", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_351", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_352", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_353", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_354", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_355", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_356", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_358", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_359", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_361", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_362", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_363", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_364", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_371", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_374", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_365", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_366", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_367", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_368", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_369", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_370", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_372", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_373", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_375", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_376", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_377", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_378", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_385", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_388", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_379", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_380", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_381", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_382", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_383", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_384", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_386", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_387", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_389", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_390", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_391", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_392", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_399", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_402", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_393", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_394", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_395", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_396", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_397", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_398", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_400", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_401", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_403", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_404", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_405", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_406", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_413", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_416", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_407", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_408", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_409", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_410", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_411", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_412", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_414", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_415", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_417", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_418", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_419", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_420", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_427", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_430", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 27803648, + "records": [ + { + "name": "param_421", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_422", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_423", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_424", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_425", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_426", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_428", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_429", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_431", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_432", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_433", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_434", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + } + ] + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "param_441", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 58720256, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "param_444", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 29360128, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 262160384, + "records": [ + { + "name": "param_450", + "shape": [ + 32002, + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262160384, + "byteOffset": 0 + } + ] + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "param_435", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "param_436", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "param_437", + "shape": [ + 6144, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 393216, + "byteOffset": 12976128 + }, + { + "name": "param_438", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 13369344 + }, + { + "name": "param_439", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 21757952 + }, + { + "name": "param_440", + "shape": [ + 4096, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 262144, + "byteOffset": 22020096 + }, + { + "name": "param_442", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 22282240 + }, + { + "name": "param_443", + "shape": [ + 28672, + 32 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1835008, + "byteOffset": 24117248 + }, + { + "name": "param_445", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 25952256 + }, + { + "name": "param_446", + "shape": [ + 4096, + 112 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 917504, + "byteOffset": 26869760 + }, + { + "name": "param_447", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27787264 + }, + { + "name": "param_448", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27795456 + }, + { + "name": "param_449", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27803648 + }, + { + "name": "param_451", + "shape": [ + 2048, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 524288, + "byteOffset": 27811840 + }, + { + "name": "param_452", + "shape": [ + 2048, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 524288, + "byteOffset": 28336128 + } + ] + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..f49e20cfc100bb6d0bc79f812a5b3a805f36bd79 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25de82c3f00339ca1e052ee48e55d726fae762c6aeb476caba8b200b7fbe36b9 +size 262160384 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..3831dd984bf9da11399469868de0d8b6e28153a4 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35887b0497bb638dd75a1355476ed43400a9769e619204ea528b3f9b68534667 +size 58720256 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..d32b6cbfa2df835794536a97fffbb94c52d0a412 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f5f2b244bb19dd119bfad6a2c5996d98a970a84bf69043f599cff9215a1f70 +size 58720256 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3cde003e4411e9b4d1fedc9870dd020f8837fff --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a24148bebc25f3cb29fd9fb227e51a4391956be7eea438a88864fa19b4d9aed +size 29360128 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d8e1b9ff210037e2b05f30c0b31fea3cf4e529c --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd60c94eb1e86e873991ad688f8925849a0cba655a12df2176acf92b3d8f347 +size 27803648 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..20a0499aa13e661299dd5a84b4eee412efda162f --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf18ef1fe5721bcd2d00ca5b2468c2a364a3cfad28672aa92df00be28914c7a3 +size 58720256 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c5b3559d990089e5d2edd337e49df5dbd826d75 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17204e3615eec1a8a1a65b208f76b751d3e61caf013b0c6a89bd884e81f67da5 +size 29360128 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..90a990edafa7ed6f41dc5614c0938767bfce637a --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d630b641e09aea8be9b262c7a92a7a660d4a72110d9b2b9033c060bec70407 +size 27803648 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..333a65735bfd4425ee4c298af48f165ad5002a45 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:588d65a4c94e8bfd097dff74d12d95445e09965967afbb0193db4b720fbe6e5b +size 58720256 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..44b27d9887505e98a8b624a84844854c9830a520 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a95c7af6f306c5c31c64022c69ed06fef229830f304c9ff0748cacf62319ad +size 29360128 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..94ea1cde0b2804a97f934b0e90b47bb51b885c47 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd176c346f4fbf69527b67d9349e78f1693132f67301692464a23f45d80d518f +size 27803648 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..f58bda06100a6508eff785c92765a0dace51e951 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d84a1f91cbb45f5fee1e9590571e34cc5875d1234c66c106162eb27d16bc256 +size 58720256 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..82ce524cbfa95b306ad793f8057be896a0c25677 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07859a66bb1a056fea513668503467913fe58b942d40fa5e2b13a9ec6337b21 +size 29360128 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc6db020251d110950d6b4091cfa753f23d6f128 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:808fe1f81a311051c1c333c11c2bab27ae2d371e60d7800e9d84dc239af3ddeb +size 29360128 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff5f34a85d20cadc03836042522556a1129b990c --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a385ac7546c8eb228232cd09d1e97a571bbb4eed9d9165092288efe70b42c1c +size 27803648 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3e95ab2d6cb1fcc838a2ac9e010135b96b46280 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36dcc9236c8aeaee927869f7405cb3ee178377beb2839193061f2189fe787c1e +size 58720256 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b97b0e6d11e22ac63f97e72b403d7b48e942941 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ca7bcfd3c96082d9eb3e01ee1f7d2c3e38f03de08bb6eac9f8e596763f2dd93 +size 29360128 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4cac1dea8152f5ed60c2fbbed9f5b63cfa08504 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a284eba602eacf403938b3e6a9f5f57fbd330377cf5d9a2dab4c084a6284165 +size 27803648 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..296b8ab484436c7724512040e6b4cc8258cc747e --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb43efe398c4e66d361c5abf33f7d60020ad3ee3ccaaa751bb6c9f6d656e827d +size 58720256 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..2032b17469f3374c5e2dc1a3d86de23dec99648c --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3554e4101a21ef49c1ba5ffc02e4379c68eb4a727906b9d7737147788f0fccb +size 29360128 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..15fbad164aa4fe834ac26719c7d6b96c4b128a82 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6ddcfd36c4276112c6adea1887cc1ab07b13046fd0aa2a1144a6306449848cc +size 27803648 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b5c899fd3a7305be848fa41fdddf11cf2fcb368 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9a8dcc9a5aac94171396f73749438ad21d883214599caa050b5feae185e01f +size 58720256 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..6987c20ed3b1fcd4be609f3818e512e4c0b597ca --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac1acdf53a01e9ee436b3cb77b738f61c9f68b10101918bea1b93dcee8b1ffa +size 29360128 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..0b6b992c025a1dcb2ff2cef4073da4af8f7a6321 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7962cc1923de979754c0951164fc952065d1e66c9c4d617e08a8371efc2fa1d +size 27803648 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..bfab47bed73fa492b49c0700800039133cad55d6 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b3f64a884e10d210e847884b5b5efac86836b8634f3a9afe7f3f9b3c90f5e4c +size 27803648 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..c56534f67522034148e9b486ecac9249a6f377ff --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5651aa77cc69f04376e6a9b41ad1eaf8b34300d10a2ca714ee3603358ea9b699 +size 58720256 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..e92d0bb61669c80c04a7c36ab8b7a11eba9ffc1c --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16aef2fbfbb242f0c8fa861d7f3ac0194498e273d780ba88065eb9ad80cdb49b +size 29360128 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..02f40a52a2c8f0d01da9c69d9bda7b31918476ea --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f33b835aaed505bee0dd07e65085f1b578d00bf4621f84e9d7e3a73fec74769 +size 27803648 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..1bf62879dbf39de11edc6ae8545627a41c4f8c58 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9054e09ba642bfa7ea7b7b4c005b498be2a42b43e72fd2d1fd243a89b9f1e856 +size 58720256 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b32779360fa6532364153dee811684d048cb253 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6520628f333aa3809b427f0af2623601c1774e16f3117cf22c9aa1e396d230b7 +size 29360128 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..46697430ff8d2bec579f85c281359ce8957e560c --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d7d0ab6f5a5cc02e7373db9790a4313193acb6854f8706b8198e58f7699432d +size 27803648 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..613ee61113522a67e48bcc07b4785dc957992f04 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:086e74490922d2a84f07422d2855df95e20e9cd8450b28e4b4c7243965447e11 +size 58720256 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..47ca9e844ebb1b8099e689bb5f4f5732afda4659 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:538bc80703740f2caf2c111ea08bba26890c094f1c6593f3ea641dd4dd074438 +size 29360128 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a11643e09a4b1724080e320a37ea2f02e2b6278 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99307126646e7a6cf4a0a5d80ed95d1816567365ecaf75631d4645adca001017 +size 27803648 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..50efe615ce037b82d10717ff1320c2d259971dac --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39d275d21b28579269655e4ddd1d8c25a80ca13a5a38440ec0540ffa6d06e19b +size 58720256 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..b00d02c2da1c287b9bde81bebe38a82343f9428d --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c2cb02380dde73bdb7af43002ba2e0d87ac06d5cf58655f600fbbdda3f39d33 +size 58720256 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..aea0d501468b1085acd32d1c84a82e8809a0c90d --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ec8f945f11dec22f5650f555eaf85d26e03b067e845ddcf5655ca64b6b2118b +size 29360128 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..263bd35dbe1ee993c0137d62005031125890e80f --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3657ce6e35d0c40ac5e472e65a507da89fc8386fe6773505f7ed7d3a7e840a8e +size 27803648 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..21c821f91a3a3df0566df92d2f8e99321d9879ed --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fcee09b75bdc1ed1a0c28ac368e5c2846fdd4a2f62a638a6693c8311386378d +size 58720256 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..50ced6a98de52a8f0af05456307b057f0752b3e9 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607fcf461ada2afbe698766e415def4a6a765b105b641f8edfe4cfa448250e54 +size 29360128 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..abe130a0aa48fd7c829e8e1ced2b71cf3163b730 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b0b01a0a587cfc012d0e8b473124de493a653ac4ff768c1d36f8051ed92daee +size 27803648 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..a5ca06b857b513770e2eb49dcedd16335c959913 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6fce451d19e08fbf310cd9f581c52e5c35dd8c9b103f3844ad1d4196c86838b +size 58720256 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..c4b3a96069681942b26b2ef60eb7da17db37f6da --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611e5fcfc1a6b61b182edc93ddf54bd96163341aac9860e4fe35bf14a20960d4 +size 29360128 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..b85c39520045163c6eb6d9f90bf10a3da0aa0946 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98670d34c594f931e3a660c6b063ab4685a6650686b40645324047d323b456da +size 27803648 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..47a51754bb12446f689ab05472e193c7b3917253 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe3a23c5ef4e82f9befa7d476e9bd69acb2793b9f955de66d703311ce7cd6d93 +size 58720256 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a9de0ccff9984b4eb4fe21fee24d9fd4bc84332 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec89be9bed3f23ecfbc092722bdcecfce8d8d901309ba40d5da76a745a1593c7 +size 29360128 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d8ca95b9bacbfb4c28c9fdc32be6e62b6a89ad4 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:250a6ee7f7f7685f0a9b6a900b3b7a0e41e9bb0b9db6cd28c00b8d0c4d546039 +size 29360128 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..d117ecc3acffbfdea5919545acf90968c9a98b31 --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e859e8250f3abba64a9289be92da5412a0fa6d2505c5b411606ae4b7e5376119 +size 27803648 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..902df56e84cd3893d656ffa9f514bb93eac550dc --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2594620e9800db64875db9e2ddf5891ba5533a55bbdbedcfcdbd04f5014d1edd +size 58720256 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..3831590c7be7e31eaf0ee88085a3d7b866c9d539 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee9eeb6667bf7a675ba95fa7ab2ab48f7d7d6599dd7faf63e0f1cd33ebe9e832 +size 29360128 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..954c5af2cf19d04dbf64e8f9f2d403bb1f272416 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0929ae653acf1c9ef5d6d7d03f97e4031069ff551f8b79ff690508d8536ef0 +size 27803648 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..55e8bb394d7de8603bb809e1769c175c002d5c35 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ab282ada3a29e45e7978f48c09905bb2113911978093aa0bf422b1514190115 +size 58720256 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..9b688259ccb0fe88a13cfe06ee4ca0d27b02019c --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5070ef002f8153ae3baecaa265e9e7a54627822e858d2dad426ebb45288a88ec +size 29360128 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..fb61cce1256f50c4507142f0b895e1a816583310 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5911611c4be40bde2b629695c16031ca3468d63b85a0de5d89199de3b4e39253 +size 27803648 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3458b90352188ac4a641c42a1c701d14a6b0271 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c40209ce370488d4115176291578183dad7fb3461b35e2065e2180b7be40d2 +size 58720256 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..8da8a69a7e4f1c6269ec400fbcad4cb7669849a8 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae72a17ec48cbf0c201a779a52c0072ad2656270dbcc8d0d1b1beb4df5e00b6c +size 29360128 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..76e6e5175d9658a9238b1019a2786d041087a7a7 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b78f0cd680df2066ea5e7d2d78f978fcef2c717a0a449406acb4016eb1c6a5fb +size 27803648 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9207912b1b3a8f4788e51c269daa020ecee7b41 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9830917c811c456e7f8c374f86e05a100a7b7dae3072fd9b16819e7e4e2fef0 +size 27803648 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..6fea31d428e218f9291e37fd58378bcc6603c33e --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0aa0305f8bee3b03daab7bdbe9177aa7294ec0c07a3a4c3d69ea893d2caa6bf +size 58720256 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..2989e51ebdec5fba04d7f5ceff3f938c710d3e69 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1f54bbf99ae5e37756f9d8021f54ce89ca3a39e839a995bfefdea8d0287ba2 +size 29360128 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..eeed8207205f273d30cfee8c9853a576ae68e8b8 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c443949fb52f53dcb12c5f7334e6db6879d013cbd1d8daea441778b5bebd6c2 +size 27803648 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..e50ad919ec48a513ef96003154213f7048bf1d87 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71873630d4b0757a74543d3ff75a1ab9398e6ee4ba26500e711e23cb9614134 +size 58720256 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..cf3d6b15f903ae51451279e1b4c79a3688f2b666 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ea3378f14f5214809eb54d2d6d77022414717cd59ea2ca0a910b991d44e931 +size 29360128 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d02c81526c90573912176a4908d79d0d2f45920 --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3499c6cd65da5f240c94fb3c4d24ab4b5cc856df223e327099fb76e8348e2e37 +size 27803648 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb36af2a438e6b7631c9b07734c8365e92d35ce6 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f135fee6f9142db641af81995ea8a0e24c9e9b57d4c252906c7da4283d3fc89 +size 58720256 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..5368014acc2eb140b44e06e6cda7e833c9eb3810 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:187522ee9590ed5b862d720b1f2a99dc8908cba30ee424b6d230933947841665 +size 29360128 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..8957ddf5c5cd2f89e63431658d77883ddbba6b8b --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f36c3d412063bf1948b303ab3a9064e03a5a8a38c88dc035929d0b4955e9774 +size 27803648 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..2b9ff091deb4a3877ae77693818a75e80b990458 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a9881aa75a1f123f74a86177a1950d85e0a9159f9c85887b5d6e3f674542828 +size 58720256 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..213a9eb5f207b6acbabc63739e77b17039f35772 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388dc800e039bb3ccbe1959dfa218c2e623977ee88b022e4459f1c9a5335ec07 +size 58720256 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..23815235ce1d0554255966dc49b18a0c99754317 --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6823a1b62c0e77904359a8d20132703ec10fd97ffa7e38ca7152bd8cfc3b8c47 +size 29360128 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..c7c55ff58563bf84164060586a5c5119a45f6cfb --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d68ff42e3c5b20371a72c91be863dcecac29faa4adf1b1c53357cc6cae9a0ba5 +size 27803648 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..e5dc6e1663e423d71bac9403b161e27a18c3d558 --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85938b5046bd3c736659b57bf60135f8153223005574cdc12456e4fefe400aae +size 58720256 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..3cb927aa656e483a94d7a84ffb51680857ccb6f7 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d1cee93c1a7cce7b5359e552413bd9b9106635e1943a60d4c7bf7cd89d3b95 +size 29360128 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..997dfbd78318b05412e1610c0df68861c7f3c32c --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb3008d01f1c1e787538fda69ec965d1f7d2e2a70b6196c1d59290c8f47eaa8 +size 27803648 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..613def664ee3c6309c76e3c492f88b918b3c81e8 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fc9d490db118b394062db2e6e1d057ff5f9011e3251c78eb53cc0ba450ce557 +size 58720256 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f9e213d1aa05dcc5b7cefd50ee43a8879aba35c --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a88fa12bbbf8750d23a1999085d5032c016494964094edc0e2b662400fb296b8 +size 29360128 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..52407b9f7a1bf70de46cfd91c96eba7ccca9ec08 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c2db4ea8dec8fd3ec76c738b8fee5308c2f8930906dbf187d995e89acbe858 +size 27803648 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a436c6a5ed74e8d1afec6447f8bd6c924504869 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b5b6394d01bcfa686744f707c472a91d7eaa7274d8631374c4c2a3d873d8476 +size 58720256 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..44592a2eb2bd37e558f32375bed7db9d44cb381b --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75eb3b12651763dd767896cb3635e011ef76244ab5b240cae03caf4091346a01 +size 29360128 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..b193ffe068f13c8225a111bb6c11add5eebc48ca --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:773c9fa8d696b62995df017a7a4c486764e95d378d418b67f7c02f146b5e0b62 +size 29360128 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2977e5f321f88ea57bb4190a7401e09c77759d7 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12f14cd77848f4d3358e8c9825cb7bc107543afee3cf4e8839af82341c656e5 +size 27803648 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0144187ba24d2b5b4c7880bfaeb7583c9a35396 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bbc2aece0cd6ebecdd8235320ef0362f55ab78d9cfbfc378a1e0e4ad4c955f0 +size 58720256 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..366d1b3371e9b4fa7dda83f1306d6b1a6dbbbf91 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8eeae94b11e05b74d7fd5495c3a630ac44f7f868179fd69036aa7bf6f6a4cc7 +size 29360128 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..04f570a88252fecf52fbdd9ac32f9493e10a27ca --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0c8955cdee773abd3f9fbd22cc780e9c374716a627af0e469babfe29de853d +size 27803648 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..f109c5941443d85502fde09a08184e5058dd94d8 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:040a3a4c19cc293ec7f168c47935ceb10eea9c8854d4dd09711037754ef44f24 +size 58720256 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..96218a96a85301b67a18eae614ea56ab5b4102b9 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799dc05455b751b03bb79ad79c8bb56db80c792f84460e4dadd81e762f982b6e +size 29360128 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..550e7c9ba8937f5323f5a8806230eee98bcea550 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e13b83445870b712aa178a953b08d816142f865e9c33f2ebffee48261d49c030 +size 27803648 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..653a45ceb2fa1984dcac4ba55af7a46623fda912 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4983d2f2d56452ed97e6ccb07a0c023dcd5959cbaecc6d6df33d717be0fede5f +size 58720256 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..61d2c24382981c5a42764b721c326638fb44613e --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0900caa547b994c502b56302d94a87b9939920cc548c7388444be38246afd01 +size 29360128 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..f56240a8b1d3e8fc42537b7dc29fa6481a4d2bf2 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc7516ee81d38cdac58c8157c7d3c8fbd71840dc3f5048e1ef44983f9d69d408 +size 27803648 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..2472bd5a5e7df0a3c3cefd1b4e4855b29c041f52 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f33d99c04a34d68ed89073a09fdc3a0adad483ac19ace7837aabd4d4ff487bc +size 27803648 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..68f52fafef127b687e7c475e8b833c4830753a43 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2846c789a1747990656ea3d958bf99d290cb74504a235c6c79c81290fcb53525 +size 58720256 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e7cdf53c77ba0b68111fc322c7b33d14a5ef344 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de4e90cdefe3fa319d31f3942e6cea1bc993f9676babadfacad5aebac767ca8b +size 29360128 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..99581c41495e31f22cf2eec115272832748e0eee --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be9b71b6c383730de5b19cdc6a16848d1f895ad532b90aa9cea532d541fc02f +size 27803648 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..39d824eb018f8d3bf89663fb621d4debf8120fc0 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce13f759c76eb8388d8921322c452c173682c55719dbf67d10a3ecaf77464c0f +size 58720256 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..131b74fce1f08b0c5b71a449a0f5852f94478ad1 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6676be00bcfbb91d197b4e6f37a60a626485aabc399729b28e33bcbe45ed9e4 +size 29360128 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..cd673df4bcf568161b8a0a60fa3d627682752efe --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ac5a2893ae06d7d5b24937ac7e7e0ffc88797f5d98b6c7197958133970824a7 +size 262160384 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7c57b8538e7488c6f15ef0da9a8451d2cf67938 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1df12ef1ba5a62f3ecba8c747b67c89962425380c9af9311a18d3effd340ad4 +size 28860416 diff --git a/private-llm-config.json b/private-llm-config.json new file mode 100644 index 0000000000000000000000000000000000000000..767517ef2819dd51bc094f8eee4534c0de3d7714 --- /dev/null +++ b/private-llm-config.json @@ -0,0 +1,21 @@ +{ + "model_lib": "samantha-1.2-mistral-7b-w4a16g128asym", + "local_id": "samantha-1.2-mistral-7b-w4a16g128asym", + "conv_template": "mistral_default", + "temperature": 0.7, + "repetition_penalty": 1.0, + "top_p": 0.95, + "mean_gen_len": 128, + "max_gen_len": 512, + "num_shards": 1, + "shift_fill_factor": 0.3, + "tokenizer_files": [ + "added_tokens.json", + "tokenizer.model" + ], + "model_category": "mistral", + "model_name": "samantha-1.2-mistral-7b", + "vocab_size": 32002, + "sliding_window": 4096, + "sliding_window_chunk_size": 4096 +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..23becd85000bd811212c519a764b50a7d639d3b5 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,68 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "<|im_start|>", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "", + "", + "", + "<|im_end|>", + "<|im_start|>" + ], + "bos_token": "", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "tokenizer_file": null, + "trust_remote_code": false, + "unk_token": "", + "use_default_system_prompt": true, + "use_fast": true +}