{ "metadata": { "ParamSize": 615, "ParamBytes": 2426406912.0, "BitsPerParam": 4.5141619875624475 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 335626240, "records": [ { "name": "language_model.model.embed_tokens.q_weight", "shape": [ 262208, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 335626240, "byteOffset": 0 } ], "md5sum": "b0cb61ed8f2b36e8bed3e1084b5542da" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 41953280, "records": [ { "name": "language_model.model.embed_tokens.q_scale", "shape": [ 262208, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 41953280, "byteOffset": 0 } ], "md5sum": "4af7c226d701c5d1c3b8e4da1ade95b5" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "6dc0ca9bd9ffe685ecacef3bb4034485" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 26896384, "records": [ { "name": "language_model.model.layers.0.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 0 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 5120 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13112320 }, { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14750720 }, { "name": "language_model.model.layers.0.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18037760 }, { "name": "language_model.model.layers.0.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18042880 }, { "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18043392 }, { "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19354112 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19517952 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22139392 }, { "name": "language_model.model.layers.0.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22467072 }, { "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22467584 }, { "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25089024 }, { "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25416704 }, { "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26727424 }, { "name": "language_model.model.layers.1.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26891264 } ], "md5sum": "1ae013807ecda4287231e3c91ca95305" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "898737e152892655b8642dd46db21b56" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.1.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.1.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.1.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.1.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.1.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.10.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "9a9d0d30b63119d93f9832e284e2ea3b" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "416e3594680010265bb2f0fb0f879919" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.10.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.10.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.10.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.10.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.10.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.11.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "5414bbc5ac713b9734096971adac513a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "6860618106c49d0ceee077784329119a" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.11.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.11.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.11.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.11.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.11.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.12.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "ad7724294792cd583aba172fafcfb0a1" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "5c3e8df00afa2cb47fb428396dd9fd89" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.12.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.12.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.12.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.12.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.12.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.13.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "f7489294a48c5073b5bea6453fdcd7ce" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "352402311f4789be745a0df4a6b2b74f" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "38eafc3cd68bc0f105a416370235661b" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 31638016, "records": [ { "name": "language_model.model.layers.13.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.13.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.13.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.13.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.13.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 26886144 }, { "name": "language_model.model.layers.14.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 30162944 }, { "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 30163456 }, { "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 31474176 } ], "md5sum": "e253cdc9e76cda1ba676ca2773a52ed8" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "b2b840da5c815b0c526fad3b94314f81" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 32790016, "records": [ { "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 0 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 2621440 }, { "name": "language_model.model.layers.14.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 2949120 }, { "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 2949632 }, { "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 5571072 }, { "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 5898752 }, { "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 7209472 }, { "name": "language_model.model.layers.2.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 7373312 }, { "name": "language_model.model.layers.2.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 7378432 }, { "name": "language_model.model.layers.2.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 20485632 }, { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 22124032 }, { "name": "language_model.model.layers.2.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 25400832 }, { "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 25405952 }, { "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 25411072 }, { "name": "language_model.model.layers.2.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 25416192 }, { "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25416704 }, { "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26727424 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 26891264 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 29512704 }, { "name": "language_model.model.layers.2.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 29840384 }, { "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 29840896 }, { "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 32462336 } ], "md5sum": "3f5303602cf349e9c546e0faf330b789" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "92e8897899c07fe0466c4622e276004e" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 28370944, "records": [ { "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 1310720 }, { "name": "language_model.model.layers.3.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 1474560 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 1479680 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 14586880 }, { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 16225280 }, { "name": "language_model.model.layers.3.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 19502080 }, { "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 19507200 }, { "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 19512320 }, { "name": "language_model.model.layers.3.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 19517440 }, { "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19517952 }, { "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 20828672 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 20992512 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 23613952 }, { "name": "language_model.model.layers.3.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 23941632 }, { "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23942144 }, { "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 26563584 }, { "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26891264 }, { "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 28201984 }, { "name": "language_model.model.layers.4.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 28365824 } ], "md5sum": "e4e865fb2b6ea3523feedb279f7e0290" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "20397668f1e79a6f7a633e02c8258ef4" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.4.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.4.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.4.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.4.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.4.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.5.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "b43d8bc718b3d2379189040773d975ac" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "87216e6237a3b3ad5e73a794d49aa17d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.5.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.5.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.5.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.5.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.5.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.6.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "73d8c47b6f2305d4ae2953a15d824428" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "dcf78ebf024a32156f1f62b8dc69d560" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.6.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.6.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.6.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.6.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.6.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.7.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "db5a4008af253d6184c1b7767972d0c8" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "4d320d9e3ff7c5289f6e4bcde9e432b0" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.7.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.7.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.7.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.7.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.7.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.8.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "233342880e4cf8eb41e0e8641013626a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "a1ff33560ed9bd14cad265a52b1db760" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.8.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.8.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.8.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.8.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.8.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.9.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "365da04f72587385ef936d0b7c48c524" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "ed1349a569da9b2985223b7ea769cbd7" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.9.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.9.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.9.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.9.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.9.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.14.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "8e1b3d688b9d1ea7f08910390a2b599f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "e6dcd4bd015fb65a73b3be115f50030a" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 32804352, "records": [ { "name": "language_model.model.layers.14.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.14.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.14.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 14755840 }, { "name": "language_model.model.layers.15.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 14760960 }, { "name": "language_model.model.layers.15.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14766080 }, { "name": "language_model.model.layers.15.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 27873280 }, { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 29511680 }, { "name": "language_model.model.layers.15.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 32788480 }, { "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 32793600 }, { "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 32798720 }, { "name": "language_model.model.layers.15.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 32803840 } ], "md5sum": "7383b59c2fb039c4eb2d50e348f4aebd" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "3a31a018bd27fe9397e354dea45f333c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31315456, "records": [ { "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 1310720 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 1474560 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 4096000 }, { "name": "language_model.model.layers.15.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 4423680 }, { "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 4424192 }, { "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 7045632 }, { "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7373312 }, { "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 8684032 }, { "name": "language_model.model.layers.16.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 8847872 }, { "name": "language_model.model.layers.16.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 8852992 }, { "name": "language_model.model.layers.16.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 21960192 }, { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 23598592 }, { "name": "language_model.model.layers.16.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26875392 }, { "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26880512 }, { "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26885632 }, { "name": "language_model.model.layers.16.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 26890752 }, { "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26891264 }, { "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 28201984 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 28365824 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 30987264 }, { "name": "language_model.model.layers.16.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 31314944 } ], "md5sum": "8ec3917f53f73083545d15b51bb7bb9e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "70db123adb6ef0f8754dad5069711e99" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 31320064, "records": [ { "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 0 }, { "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 2621440 }, { "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 2949120 }, { "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 4259840 }, { "name": "language_model.model.layers.17.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 4423680 }, { "name": "language_model.model.layers.17.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 4428800 }, { "name": "language_model.model.layers.17.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 17536000 }, { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 19174400 }, { "name": "language_model.model.layers.17.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 22451200 }, { "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 22456320 }, { "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 22461440 }, { "name": "language_model.model.layers.17.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22466560 }, { "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 22467072 }, { "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 23777792 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23941632 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 26563072 }, { "name": "language_model.model.layers.17.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 26890752 }, { "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 26891264 }, { "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 29512704 }, { "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 29840384 }, { "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 31151104 }, { "name": "language_model.model.layers.18.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 31314944 } ], "md5sum": "c5a575a66de904db777e0646c596cb17" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "0fed09c2dfe1d0ca85d0066ce8b32eea" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.18.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.18.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.18.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.18.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.18.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.18.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.18.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.18.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.18.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.18.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.18.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.18.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.18.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.19.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "dce74415ba7ac39d7e75b4ef0e7189f2" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "5d3247e16ad35f0f6ca0b816b404f59d" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.19.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.19.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.19.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.19.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.19.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.19.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.19.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.19.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.19.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.19.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.19.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.19.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.19.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.20.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "62e9c0f5769d338c2809e7b8d717eae7" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "3f50aab70b412f65ec3144b9e4ec1727" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.20.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.20.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.20.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.20.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.20.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.20.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.20.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.20.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.20.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.20.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.20.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.20.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.20.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.21.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "edaec9a07579398b1f6417b2b58b8d9f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "8734cbc17bd1867826ac4708f9a00818" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.21.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.21.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.21.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.21.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.21.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.21.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.21.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.21.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.21.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.21.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.21.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.21.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.21.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.22.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "a9dd96bf3e6e9c17bac74047abebec14" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "8dcf095ff7147aa9b1617bb6c9beea03" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.22.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.22.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.22.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.22.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.22.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.22.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.22.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.22.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.22.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.22.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.22.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.22.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.22.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.23.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "22e7658493ae8c2bfc6e690286e4be97" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "01061115965c326b3a433d66eb4b84f6" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.23.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.23.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.23.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.23.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.23.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.23.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.23.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.23.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.23.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.23.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.23.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.23.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.23.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.24.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "16379b6439c3774fb5bc567296500692" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "0e0ec902aef470b8988a8ee28a0271e7" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.24.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.24.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.24.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.24.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.24.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.24.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.24.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.24.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.24.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.24.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.24.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.24.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.24.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.25.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "91f0aeef1abff891fa6bfcc5ce0b3059" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "674e31a74699a5770e8866b017d33d2d" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.25.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.25.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.25.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.25.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.25.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.25.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.25.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.25.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.25.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.25.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.25.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.25.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.25.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.26.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "030bc38f0f87f98b9e14e4702a110d0c" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "a21977b1f156a4352d78063662f1df11" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.26.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.26.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.26.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.26.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.26.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.26.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.26.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.26.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.26.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.26.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.26.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.26.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.26.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.27.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "aa62f24f9781e4b564cc3cc92ed8fc58" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "b427423932a1dfd548954c81ece78b3e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.27.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.27.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.27.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.27.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.27.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.27.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.27.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.27.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.27.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.27.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.27.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.27.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.27.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.28.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "c1ca684f16a12ee5af83e75d67d60a9b" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "eb7824bd8eecf81a67f7ed86fde3957d" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.28.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.28.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.28.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.28.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.28.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.28.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.28.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.28.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.28.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.28.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.28.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.28.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.28.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.29.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "e49f6b888e5cfbfbcfd652bba9ba39c6" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "04baaad15f78e71b2ecd3eccec44f625" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.29.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.29.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.29.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.29.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.29.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.29.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.29.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.29.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.29.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.29.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.29.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.29.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.29.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.30.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "7b887e8a7f1054b1ad4a65941cbc598f" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "58603d185a2a3d01046b2d726d2be56d" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.30.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.30.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.30.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.30.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.30.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.30.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.30.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.30.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.30.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.30.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.30.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.30.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.30.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.31.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "e3110ebb04bd0d00ef82262ab3656263" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "4f81c2bf41912ac5e65f73dcec226e51" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.31.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.31.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.31.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.31.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.31.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.31.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.31.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.31.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.31.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.31.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.31.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.31.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.31.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.32.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "cd179ffd445f96619b70b0483e29a126" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "65a7c055c0e80319ffd3618e208af7f3" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.32.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.32.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.32.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.32.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.32.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.32.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.32.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.32.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.32.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.32.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.32.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.32.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.32.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.32.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.32.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.layers.33.input_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "3a313b8023044ea07450f8d847d0f332" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "language_model.model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 20480, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "434168c601e9e2c40e46e2654ef0ab47" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 26891264, "records": [ { "name": "language_model.model.layers.33.mlp.down_proj.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.model.layers.33.mlp.down_proj.q_scale", "shape": [ 2560, 320 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 20480, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3276800, "byteOffset": 14745600 }, { "name": "language_model.model.layers.33.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18022400 }, { "name": "language_model.model.layers.33.post_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18027520 }, { "name": "language_model.model.layers.33.pre_feedforward_layernorm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 18032640 }, { "name": "language_model.model.layers.33.self_attn.k_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 18037760 }, { "name": "language_model.model.layers.33.self_attn.k_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18038272 }, { "name": "language_model.model.layers.33.self_attn.k_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 19348992 }, { "name": "language_model.model.layers.33.self_attn.o_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 19512832 }, { "name": "language_model.model.layers.33.self_attn.o_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 22134272 }, { "name": "language_model.model.layers.33.self_attn.q_norm.weight", "shape": [ 256 ], "dtype": "bfloat16", "format": "raw", "nbytes": 512, "byteOffset": 22461952 }, { "name": "language_model.model.layers.33.self_attn.q_proj.q_weight", "shape": [ 2048, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 22462464 }, { "name": "language_model.model.layers.33.self_attn.q_proj.q_scale", "shape": [ 2048, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 327680, "byteOffset": 25083904 }, { "name": "language_model.model.layers.33.self_attn.v_proj.q_weight", "shape": [ 1024, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25411584 }, { "name": "language_model.model.layers.33.self_attn.v_proj.q_scale", "shape": [ 1024, 80 ], "dtype": "bfloat16", "format": "raw", "nbytes": 163840, "byteOffset": 26722304 }, { "name": "language_model.model.norm.weight", "shape": [ 2560 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5120, "byteOffset": 26886144 } ], "md5sum": "f4cf215bd905d7d33ca09eb632990175" } ] }