gemma-3-4b-it-q0f32-MLC / ndarray-cache-b16.json
riczhou's picture
Upload folder using huggingface_hub
e013b5e verified
{
"metadata": {
"ParamSize": 410,
"ParamBytes": 15521052672.0,
"BitsPerParam": 28.875843384877793
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1342504960,
"records": [
{
"name": "language_model.model.embed_tokens.weight",
"shape": [
262208,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1342504960,
"byteOffset": 0
}
],
"md5sum": "f50e3af4232a7c5224828fb5f7b77588"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.0.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9aa441f19294d6bbcdd0635455393bf0"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.0.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "b207f84f1a4c4efa216672e0148cc2d5"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.1.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "75e8a8badb6f18288928233bee6b4c88"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.1.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "d78d246db4c8409a10d4e71dfc65e833"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 31499776,
"records": [
{
"name": "language_model.model.layers.0.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 0
},
{
"name": "language_model.model.layers.0.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 5120
},
{
"name": "language_model.model.layers.0.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 10240
},
{
"name": "language_model.model.layers.0.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 15360
},
{
"name": "language_model.model.layers.0.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 20480
},
{
"name": "language_model.model.layers.0.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20992
},
{
"name": "language_model.model.layers.0.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5263872
},
{
"name": "language_model.model.layers.0.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15749632
},
{
"name": "language_model.model.layers.0.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15750144
},
{
"name": "language_model.model.layers.0.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26235904
},
{
"name": "language_model.model.layers.1.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31478784
},
{
"name": "language_model.model.layers.1.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31483904
},
{
"name": "language_model.model.layers.1.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31489024
},
{
"name": "language_model.model.layers.1.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31494144
},
{
"name": "language_model.model.layers.1.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31499264
}
],
"md5sum": "a7da304a6598355b19bbc6fc87b5ae59"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.10.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c4388571375e1eefa2148eabe9b53d56"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.10.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "838e891b949e122e0b332ddf81f2b345"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.1.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.1.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.1.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.1.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.1.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.10.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.10.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.10.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.10.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.10.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "69df66874287cc4652b77bbfc2de0428"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.11.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "82355b47a10e74ab18684bfc9f93cacd"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.11.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "8bdf2e0d7c1df86f4782601cf44d0c4d"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.10.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.10.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.10.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.10.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.10.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.11.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.11.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.11.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.11.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.11.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "ba1d544f79f0481cd0ec1aebb648c4b0"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.12.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d5617605ca564b047e0881f87ee47bba"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.12.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "91339678e41ef735989e61d446d4bbbb"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.11.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.11.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.11.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.11.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.11.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.12.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.12.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.12.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.12.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.12.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "52afcead724ad43843529a340377fec4"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.13.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "03fb87e42e021bc86070d8033b836929"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.13.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "462fdd21f09b78e6c6d8a70739b91cf4"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.12.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.12.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.12.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.12.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.12.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.13.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.13.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.13.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.13.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.13.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "1d681b710052d2e0a6491d7f8f509bfe"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.14.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "841c2c1dab090098a0eff8bafaac3f54"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 31458304,
"records": [
{
"name": "language_model.model.layers.13.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.13.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.13.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.13.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.13.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.14.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31457792
}
],
"md5sum": "4a05b1e13c21081ed8b805ba67d0b4bf"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.2.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "bc07c315e9f72a59df3d902608c66923"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.2.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "d174de12e98774b0e0a4bfcab5b51bcb"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.14.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.14.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.14.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.14.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.14.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.2.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.2.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.2.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.2.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.2.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "3a428919da96950d807d104930613d48"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.3.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "49b7585eb16707a47d5c77aa5f91c27b"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.3.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "80d3370486d81a1d5c01b733631c67a5"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.2.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.2.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.2.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.2.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.2.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.3.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.3.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.3.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.3.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.3.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "cac27dbd0eb11635550a2ae845e53ccb"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.4.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4d62fa2df11722c4bc7175b4a76bb3cd"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.4.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "f10eac499d5087be14bf2bcc71913c1b"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.3.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.3.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.3.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.3.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.3.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.4.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.4.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.4.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.4.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.4.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "e159577d49a1bd37c92021d2ad386a27"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.5.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4bcc2be1d2da93c5eb7fc631121104c7"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.5.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "b173b6c019e3ca7554b82308da7efdec"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.4.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.4.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.4.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.4.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.4.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.5.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.5.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.5.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.5.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.5.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "a1cfec300b2388b17482ccd53230b498"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.6.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f71be1ef17355640d23193e39b81ffc4"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.6.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "f9abe612ebafd3f1ddb77634204e210c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.5.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.5.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.5.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.5.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.5.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.6.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.6.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.6.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.6.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.6.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "4e1a54223978e07279523d691106d659"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.7.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "688022b86fa887ffb7410dfc41225fda"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.7.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "1a8be31b63249864de92c0905daa0b0f"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.6.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.6.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.6.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.6.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.6.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.7.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.7.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.7.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.7.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.7.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "adf1254da3fa1ae01c3b3b9c41cc9a45"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.8.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "cb5d6cb94788e9821fddbe1e9ae557a9"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.8.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "0ebfd053c98f319f2870f67edac118c1"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.7.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.7.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.7.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.7.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.7.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.8.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.8.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.8.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.8.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.8.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "3e502e178d1653b05dbad92ad79a6f66"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.9.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "531b28395a2464843b5e48e275c5c2a3"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.9.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "dcfe9b70f5ebb22dbcb60cbd250045dd"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.8.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.8.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.8.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.8.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.8.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.9.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.9.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.9.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.9.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.9.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "240e08000b80f1dfa6d9893cbbda5525"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.14.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "22f320fec9de331a7a617159e06b4626"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.15.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2f4fb4b2d94ac5060a668fd41d655f6c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.15.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "c4cd551b547637d8839d80b59e9d846d"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 31499264,
"records": [
{
"name": "language_model.model.layers.9.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.9.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.9.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.9.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.9.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.14.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.14.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.14.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.14.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.15.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31478272
},
{
"name": "language_model.model.layers.15.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31483392
},
{
"name": "language_model.model.layers.15.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31488512
},
{
"name": "language_model.model.layers.15.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31493632
},
{
"name": "language_model.model.layers.15.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31498752
}
],
"md5sum": "cb174ebd73fd08ae8523a341c5c7bad4"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.16.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c5dd083f65f984b4103117ca52317326"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.16.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "ccfb6f3c8a4c209cd43581f670fbaff3"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.15.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.15.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.15.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.15.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.15.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.16.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.16.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.16.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.16.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.16.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "54a517985fc8eeacf9a90078b741e1d5"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.17.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f796cdab0df249a625e5577890d699b0"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.17.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "6926ef90c34a6449e52ee257c2da44bb"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.16.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.16.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.16.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.16.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.16.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.17.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.17.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.17.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.17.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.17.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "0bdc43162ddb6c82582c2ab9335337f1"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.18.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d9c065613f696ebbc48677c626928004"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.18.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "18fce522c6b08d42079bb2e9a2d5aad5"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.17.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.17.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.17.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.17.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.17.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.18.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.18.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.18.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.18.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.18.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "89d3fb4fef457b9222e950a8f789681b"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.19.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "51ba853b2d441cf49c6b60cb8815ecb9"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.19.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "9e5e8a1d7edbae61c91c8fd9af36df4b"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.18.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.18.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.18.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.18.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.18.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.19.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.19.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.19.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.19.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.19.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "a8a3777f1eab756cf0b114a541a54f77"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.20.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "42a2f801f481177db3e4d50f3bf7d9b7"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.20.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "85e507579a0b6e9deccdb979c95a99be"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.19.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.19.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.19.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.19.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.19.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.20.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.20.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.20.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.20.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.20.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "3b96216f8c5b88dc7a4fcebc8f1ecdc7"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.21.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6ab9fd7ef6fa0fe70f6257474e4bd789"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.21.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "6579bb9178f0b54fbbe07cc4c39505e8"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.20.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.20.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.20.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.20.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.20.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.21.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.21.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.21.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.21.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.21.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "40b1ed80bbf55113007a6147941cc6ca"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.22.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "fe722f5d6597ae18ef3fb207199f20ff"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.22.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "5f2d92a18c610475903715de44fbdc29"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.21.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.21.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.21.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.21.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.21.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.22.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.22.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.22.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.22.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.22.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "82dc3a90687b49eb56c59bb6c87fa7b0"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.23.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "03dc6d98481454095e319ddf44194d82"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.23.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "e90e93f256e66fc5fc30097598eac0ff"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.22.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.22.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.22.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.22.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.22.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.23.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.23.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.23.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.23.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.23.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "4f24979c2b6110725a9b87dfbfc16dc7"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.24.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "cd8ec3f88b37bc4bfb3eb77ec08091b7"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.24.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "626bd6277285e1fc64fbf950d2f39bfe"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.23.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.23.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.23.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.23.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.23.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.24.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.24.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.24.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.24.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.24.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "9ab76300c035fa659b8732f4619f5224"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.25.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7afaa26e33a6fff8c5c751e882b5e44d"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.25.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "31e9de7af2c86001ed53b12fe9aedf63"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.24.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.24.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.24.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.24.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.24.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.25.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.25.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.25.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.25.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.25.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "fe280796cbdca1d20426840bfb2b07a9"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.26.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4c67b1250308866759b201d95dfc7632"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.26.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "14668295a8409af2692602420ff5c412"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.25.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.25.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.25.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.25.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.25.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.26.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.26.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.26.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.26.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.26.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "8994dbd37eff306a01150bb2f96844f1"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.27.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c7bf6a727de164fdefc1239f10f763b0"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.27.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "1af847be1dc9f8e43b76b6e0e42b1f10"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.26.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.26.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.26.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.26.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.26.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.27.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.27.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.27.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.27.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.27.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "018b39740bbb5859df109e5f5d4b9fc6"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.28.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "16116fdca785706abdea6a3fa9ca4922"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.28.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "f18daaef6b8f1c853564b59ffce8a2d5"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.27.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.27.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.27.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.27.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.27.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.28.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.28.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.28.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.28.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.28.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "825372ce702ac9780a104980b8f2e80a"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.29.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9d3983774d21d858dc10bd427d19823e"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.29.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "dfa4617388a23b0e9ba7beb99aae398e"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.28.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.28.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.28.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.28.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.28.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.29.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.29.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.29.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.29.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.29.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "5cef7a8b387644783b06587c62e98e71"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.30.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4fad661303d2e1ac214fded504b49e23"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.30.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "b3074cfbd2f427a642938de01f69cb2e"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.29.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.29.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.29.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.29.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.29.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.30.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.30.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.30.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.30.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.30.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "2662cb3d54333c0161e376f5b01a08aa"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.31.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9a36b13c0872c445f78f56746261075d"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.31.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "a4c1807f0d53ec0aa46106da01b9c2a1"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.30.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.30.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.30.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.30.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.30.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.31.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.31.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.31.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.31.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.31.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "87c2a1494ba26fb00ff45e8ff5ada19e"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.32.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ee6ccc68642e46042ef241c109708c2b"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.32.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "5ba7f8da8dfbcfe9a42db091e06ba5bc"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.31.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.31.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.31.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.31.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.31.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.32.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.32.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.32.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.32.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.32.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "084178fe40d055a35438acaa2b54e51b"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "language_model.model.layers.33.mlp.down_proj.weight",
"shape": [
2560,
10240
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "0c744d908afcf5961227e7ccb981c84e"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "language_model.model.layers.33.mlp.gate_up_proj.weight",
"shape": [
20480,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "3160a5e21675a6b87e12b1dd6266829f"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 31478784,
"records": [
{
"name": "language_model.model.layers.32.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.32.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.32.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.32.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.32.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.layers.33.input_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
},
{
"name": "language_model.model.layers.33.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31462912
},
{
"name": "language_model.model.layers.33.post_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "language_model.model.layers.33.pre_feedforward_layernorm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "language_model.model.layers.33.self_attn.k_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 31478272
}
],
"md5sum": "d9d3f6f360469b81ede4d1adad560641"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 31462912,
"records": [
{
"name": "language_model.model.layers.33.self_attn.k_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "language_model.model.layers.33.self_attn.o_proj.weight",
"shape": [
2560,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 5242880
},
{
"name": "language_model.model.layers.33.self_attn.q_norm.weight",
"shape": [
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 512,
"byteOffset": 15728640
},
{
"name": "language_model.model.layers.33.self_attn.q_proj.weight",
"shape": [
2048,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10485760,
"byteOffset": 15729152
},
{
"name": "language_model.model.layers.33.self_attn.v_proj.weight",
"shape": [
1024,
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 26214912
},
{
"name": "language_model.model.norm.weight",
"shape": [
2560
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5120,
"byteOffset": 31457792
}
],
"md5sum": "5d61f99e430f9e8eee4a86349c2e5b77"
}
]
}