CharlieFRuan's picture
Initial commit
d9ad748 verified
{
"metadata": {
"ParamSize": 805,
"ParamBytes": 38801408000.0,
"BitsPerParam": 4.353045149919394
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 131137536,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32016,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 131137536,
"byteOffset": 0
}
],
"md5sum": "1a6aa930cf6ebcbf431eb8979007e3a0"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.78.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c83377dbb465f85be41fcc9103f72994"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.78.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "0044ff0ed1cbe01c2cca0b52d93f09fa"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.78.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0c23d8490f56951073bb5dac2c8ade4b"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.78.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3e5c7bef0802c6406fe2dc086df00dc3"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 31105024,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32016,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16392192,
"byteOffset": 0
},
{
"name": "model.layers.78.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16392192
},
{
"name": "model.layers.78.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 16408576
},
{
"name": "model.layers.78.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 31088640
}
],
"md5sum": "16f6f30c6e3b730f2030f9096d53b555"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.79.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "56ba4bbbf57348d81f879dc7734ef732"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.79.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "6c6fa04c463b30220ff47c4d75355884"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.79.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f5ba9f80877ce021f44bc0b08f8f2c94"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.79.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c9154d5eb5a3294b5c9fe8ea5cec36af"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.79.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9b8eb8b2d667d3d32a7da7f668537218"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 131137536,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32016,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 131137536,
"byteOffset": 0
}
],
"md5sum": "a1ffee69145fdea11ea3d3b0f6a3115f"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 28360704,
"records": [
{
"name": "model.layers.78.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.79.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.79.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.79.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.79.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 18907136
},
{
"name": "model.layers.79.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24150016
},
{
"name": "model.norm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28344320
}
],
"md5sum": "f371609de565c93e1a65ed030b39facd"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c28988017aac833acaf830dfdec3bcca"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "2def2012579073ceb5b66e96499f8bf5"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7c3fc2d96d6f6619fe1a2548e7c59caf"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2ff2d42a246f0bd68ebb997c65e15405"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 31105024,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32016,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16392192,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16392192
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 16408576
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 31088640
}
],
"md5sum": "0f87643f4b37693aded439e869404f8a"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "07aab81f49040e77db5730395d51de06"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "f1db8811a9523f83ecd9202b8787113d"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "c87ce13e738a5c89e7e3a51c25812a7a"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e5dbcdb9179e69ff6d4c8dd59abe8757"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "0d17c31196ac186b485c0f7af77bd7e3"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a29ef83550710b885227e26f7324370e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 29392896,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9453568
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24150016
}
],
"md5sum": "04a8b4f9a6cf67cce3ae56862e4be2aa"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "eba46fee73614503c290800a988efcb2"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e9eba56282e5f54a887235d1304c3deb"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "74a64130df125fe9e939f8b58587f5b7"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9eb953adb9444b86fa98df2d4fe82055"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2f17d41196a5ad03340b7ee01d50c151"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4c9698b80e34933735238c766e013e4f"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6cae38a913c420fa6902f98c3c1adfb9"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "3d0c6b0899d2add784460b4d2886052e"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 28344320,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 4194304
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 9437184
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 13631488
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 13647872
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28327936
}
],
"md5sum": "c6dc82bc0daace449d5fa4fcddab6d85"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b47486ee9b750f5e7583b7e78a84e1fc"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "71484c9c05567bba0fea0c08dd5d801d"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "07d19fd7878ff7d2a4cc1d184aa15d75"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fa9bcd8d47f0a18d5a4031b18522ef70"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2041e1fa3f778b5ed0e58e3d0ef8a297"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "07531aa18a6baee26af60db7e86fe511"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 28360704,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9453568
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24150016
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28344320
}
],
"md5sum": "83b3e8d1754d1e7db339d82d23415443"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ac7bab1ead91f651effaf6b4b05c1d3c"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8698873e6bfbecae68a46640ddb3dc3e"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "448981afdf61f0310a6a9cda32253764"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "82eacc5f0039f0e2d1efc38581d58095"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c19c9b455550bd728aa94b7bbd0dbc0e"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "5354fa5d54591690edd27f875048a0a0"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "987177880edc326bd5d76ae0e7828f7d"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "28163b15c26e56fb850a8273bfd4af19"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "eae6d00fd6a46f1833005ebe0ac5cdf9"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "91f8f70a3ced772194defbef25e412a4"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d7b4b166ca59d309ec0999657c076820"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "345602dc2ddcb80eb9584a275ad03a7d"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ddb1ed1e09e5c622cecf307859dc3bbc"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "52ba37eacc371f5f3ab50c1bba4b0a66"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a4592e89ba56556309306d82bf23927b"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "597655d872f8760a1a3a19ab18adbdf1"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "63953849148abeef6485e6cfd4f025eb"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fdff4a2f85dcb9573ff4bb21b7c98838"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "7a684c107599b0de32721334b44d2fd8"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "775fd033f0f021225774eb82da97eab5"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "fe6006f175e4da5abc79d22569e44733"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d2293dbde4cdd38dacd85f7b482c81e5"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "5273d4b29b7ed5be589418ca96f3a3e9"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "11e5b64045be506d713015443f0927d2"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "c8a395308fbe111a541b376a06f91245"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ecb6bb5e91a6a78abf274c3cc19d823b"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5d7ffa12296886ba92be1360765538f3"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e7714167a35e720da80995795ee7d63a"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "390145ff7f4331f9e80fdfaf573c285a"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "dd0ec4352d345e2aa41d9391690c724c"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "87e9af0ce3dde1906843a5d3b03ad024"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f434779633839ab77e4c4e21228aba72"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e8a51a0e4563ec88fdf5906cc0e1be8a"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "303f4d64612f14ec753989e1bb70925f"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e796b3d3e3abf6f34117f24e5619899d"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5d7e955f4a4b5269db890a21844a3c0f"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "32c8505feda05f73caea1743ec69337b"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fa595ad42c8795c18bd3ced3b5420768"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "c77714b596e4276b39ae0c25714f728f"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "6132727ae14038a95c3742a43868da56"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "501d3cba3a3c5b719868bedd800d78d8"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "9580554298dc5fe16cc95cbf1df2a6d9"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "857fea31d804132d881ef4e23ab6b7ce"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ccbccac47ee785bb6c4a795ccb480a97"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5f892550295fe1950bc05e448e0e63b8"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0c2f0e7acbdf9cca1f6723f8887f1702"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "25663a56433ee7d22c0c041dc050e1fc"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "070528c8c064222b93c5c23508ab6c95"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "11d0334b8aa634f66e9baca14c3f40b6"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7b1c30c604ce80609dc11203beb51477"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a9167e5b33d1bb88b61661932916a5b8"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "de5191b36144a50cb63ec780609e4d85"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "36c27a2acb406f4e28d42913de799ac6"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d57b397d30b00c1fcfa3b0fdaf0ed5e7"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "5537793bd96629fb7abf8d07534c295f"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "0222571d87cf2fd230089ca9474fe6d0"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e1ac3555618a8d8e5ae8128aa2f31ec5"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d474725ab330c214a0e85c2edbd6ffbb"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "6d57c156530d3481f21cef165cb0b1f9"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "67377f493fd1770f41a9373623eabe9f"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "5c43c3c16a44efb4db2e4abb5c86c832"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "21003f63ec81b8285e05fcc5041cde36"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c07c30f35123b02f233df19679910691"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b626f9362254db3cfe0018e1f30652e4"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "7bb1ccf0257b2633879c2792f4118ba8"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "6d69bfe69081d2dc13ea46c302448011"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "05d4489954545d889c4c86ca904790d9"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f676820e044153c1456acdcf86aa5355"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "24af9e9c63018fdfb50ff7600130425c"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "76e6e2dfb6b1ac5a9547511aa4fbff2e"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2b7af6591dc0a5688053b702c34046d7"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "341b86372d497394a9f7a9f71be2b008"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 29392896,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 29376512
}
],
"md5sum": "69fa375ed7dec263740d5e40c15529ff"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "be98b15ba292bb71b7b3535a9b5eaf11"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "57cbb266316e5693eb77179a2f3edd7b"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ed3f2b36c06fd4f24bbe8d8533c9d91e"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "26b9efb4a76be03d415301415a6b547f"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 29409280,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14696448
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 14712832
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 29392896
}
],
"md5sum": "4306308ee69f8c392d56b9bf04d3c453"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "49ac0acb33c98d33bd6bb5032f44e741"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "bd37afff60779fa19ad9c934f48702b4"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "bc64ad4974f32bdc93de209f7f144b5a"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0bfe09ccfb07c84bd9b0956ce727759e"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "67fff70069dedcffe78b5d44537f94fa"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "367cb9e8896046fe80fe403964c12497"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 29392896,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9453568
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24150016
}
],
"md5sum": "c23de6ca617916783e44f7771e1f36ba"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "3aa8b1f71181ce21f6e8bcd746b3a8b0"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d6529ea6c5d697d7f8adcc837fb9f0cb"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "4d30e70878aeddcfbfda7b65d0b1d676"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f541e0a4984fc3ed5fda41b47bb5c7db"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8edcfa8a7392b5cc7ea20e2a545a49f7"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "536752e4da6371bffd5660d5bb644a3c"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "86c7f11ddd3cc7d041c92ea8a4388e29"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 32555008,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 4194304
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 9437184
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 13631488
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 13647872
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28327936
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 28344320
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32538624
}
],
"md5sum": "b185c36e876abf8d43ba84ab21a749d5"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f75acfb2d024bcd49c2fef2bbc2311fa"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8976c9cd5040da32798b81e4b4eb627a"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "54b7576c620045de02cb8aac3e404466"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "27d934a9e972a7004d35d73b048ac4d2"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "49fc35b4c206708544308593653efabf"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "06f5687e1cf5eefbf2699c1eaa8b9276"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f249c64d211b1ac4cd4f293b8fc8408f"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a033fa0dd62add7ba1485ae194256ecf"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "277c20844790aaac8add14456bb05106"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "36332aef298ef172a705d26cc8715fdf"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "f35a3a06f6eba989cfab50ef42255937"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "40bff02f2be9cb52dc303cc990eeff5f"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "14d0106952f0a56de30e0bbd603b83c8"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b45596c2e71c2e8b6efc5e0b645bc6f5"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "725947b77bcc00ad389e5ffd50801081"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d1dd59b36c8ba56d35e2d094f8aee23c"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "b89dc40248ed5853cfb078bed6b9e440"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "14f82e61ef0758345eb08b6c30587a94"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e9ed243d184aec8c9404760685b54089"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4f76f0507eab8c688cf6cf168ea968e1"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8767a9c9520ae8e0d798041f5cb13f50"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "700f9941948798aff5b432e75e006888"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ebd4033c03614ac28bf916f7674a7d9a"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3003f1e2ec2687225b6816fd508f5855"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "cb41558d67c5024cdc9be97d9fcb8f80"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2e1b1f7db8c9f35da6ce9f8a911f3d35"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "f8f1e69aa6b0f786d01a7b84f30eb974"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "cacfb44bad0add2862460b43d26ea82d"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c0175b07abb6f8b526eaa33039b5429e"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "29e2338d72007745f6cc104fa8579471"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "17b931a7e1438b05b095c02db37293d7"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "dfdd893b9a823fcbac0dc4a7d853740b"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4c6ca4c8e870590d7c38c47056eb365c"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "149cabc42a875aa800fd38fca3e21add"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "88f427d2f35f8ca028f66fa04848cdfb"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "a53919802b5af8a1ea03a0a3e8d8251c"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "bd344d26d2cbdf865fe5bf0e0651c8cd"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b61af61f353e73d72aa9985a00606325"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1ea0e63776d330eb7d84b6740cad62b2"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c28e65ceef640b38cc03fd2880cf1bc6"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e63c6c65c708ace0a4fcc743f9781735"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "9be64e7c087be6bb83b1d5c5faa16e33"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1fca6a9a7c0c1b30cf3f49acdd9b83d2"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "86378867990096681db9b22592990db6"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "b6d89b7acf7fe40740fc4c41d150ff30"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "4ec001110d7bb3643599239027127384"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "4e2ca80db2b536d0240926868d3ac5c6"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "e28d41eac609024b38febcb896918a35"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ed99ded14cf3d0244e81dacf98538338"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "34ddcf9472bc2ce80eeb8dfeaa4a5710"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "58d76f24bd09e31adf5b3b91d7403a8d"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4b2a3e585b57ae69e35690190169cef3"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2b14f28d6979ec556c8fa53296cbf880"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "a9e0883a01dcae816ec3a08adbc4121f"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e715e5580b58ec13a18af2854a97f0c7"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "eb46bd17627c9727c515f8cc5aec35c5"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "db4ce43bb453ca0885ce0bebcc6be4f4"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2e20a7548265f9df2089822050aac57b"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5c3cb77a4c93798f9a52717634ea6b46"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cec4129750a5351578274f51d28327b0"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "67fa90f0846f4dff7eec1a6a764d967a"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "237ba86411fe53a5e0e4505569dc8632"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a067ca99d9d64d592cb004320689f82c"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "929ec4eaaa6cc4d6e5ef562225cfd4a1"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "72195e1da7dac3b3895f65e1473839c9"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "3916034214a41606dbeb3810f0f516ed"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ad4753f3aa3a541aee93b4a70e9ae84e"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d038b53bfca4e1ca169562ca365115fa"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c818034a5a5231a2041aac152a3d8577"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a82d3434355919bb2a2548e46713d165"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "3623d07b8fbd19cc9a1dd41b758255bb"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "ca2add840782c9a90d2bb410e3284ded"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "0d473927b5c7afb44bebef536b849827"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "df4bb163f5119baeae1fe79c9ecaf457"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c4e3e93daa8cf8cda7ea8196f6b166d1"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9dd78e90b1353484431b915a6e0e3c71"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2b779651aad56210a6f7f96b71b36d13"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "9fd4c160f64bea5ac7b803dabb969c5a"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 29392896,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 29376512
}
],
"md5sum": "e3bec96e5ae9454477d1d8040662b8e9"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4974382710221d9beac8952340ee1436"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ac0807aff2a7f92d772797025da86af5"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8cd4a22782e99e1e083efb1cdc5a98da"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "dae168328f84fe20e137c9b5eefe8a29"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 18907136,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 14696448
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
}
],
"md5sum": "eb12da91ebbbc37c0e001cf0a52e3d0f"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "5e3d7f72d6735aa24ddca6ecb13e2d62"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "02b8225bb8ead27f0d86a07efdc9ffec"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9f859eda6343cfdaee8da10d3abfa6f1"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f9b0bb307d4d8f67896e9eef13fa97d0"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "eae639080c58e50fffce3f5b9ce7ac75"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "785de765a97b5ad1fde22a2e9ff139ee"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "804e7241fb5f7cd95acecd7f8cf84505"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "69ea2515ad42524ea0f6d0b689aa29f5"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6a09d87fd61e7bc37c2c170a4c682f66"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "dd45f20e963f6ca324381a32410e654e"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d869ec996ddeafeb4c6e1ad5cd3f4354"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "87d72c563e3a8704c0086e824bd13196"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4b0f792ffa910ac05225d55161a64631"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e0f4d2a8cf3d4b740e79c57a2fba94e5"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e18f743db30de8d885ba3d54e5dbc544"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2ad1e06047f22963e8e7bb3eb9135902"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "44ce8e993d73dacb7e529995d8929f4f"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "c4c7e81881778f4b9ddf1e4fab238e07"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "11d165984d405c8895cd88ff2237bd90"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "002db14e799ed01d6a9951326b4b2f19"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.40.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "49e395d4971639f3deb49e8a4568ac7c"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8f65359127994fa21f57943a943de4cb"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f728aed0d7b50a5f28ac73e3b0575be9"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "793330d0ae186dd415b58f6413a01c4d"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.41.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "fe0d4b97041b4791539315baef8bfab1"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "40ec6174a8480043f883b2b3e313d3b7"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.40.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "4e0c705dd5ecfcd03dd936a915d0dc66"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "dc651df7ebc7a5899fee57549c288df0"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "afb7391a5290b93c3db199506b960710"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "36ffcb9430b193838d73b95c6561eb94"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4f6c9e82232c60d0e282c54ac3a1959a"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ed7ef2c2a22f756e9db5a5e70f9bb85e"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.42.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ed512284421fb9773c7450c64f0c35f0"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b6c806a3a15a07f1728fa66d43cbd97c"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "82d146ca4c5edaeabe1f7399e9a157cc"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.42.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "4483249491dfa74502a6a2208f130d6d"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "bd6e8300635ceaed9c6fc1d9112712fa"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "89695c2996b8d08a56e2954eb5b4ef63"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "029cbd32ee7c02b538cfa2e741c3eee7"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "552c739e645e2a4fb30a70d692737dbd"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "526e3105099cc848639ecef186235b5d"
},
{
"dataPath": "params_shard_259.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "35403d5da7e20c5ed357c4fe433e1c46"
},
{
"dataPath": "params_shard_260.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.44.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1d2e3208d0fc9ce36006604553fd3f5c"
},
{
"dataPath": "params_shard_261.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "69bf496720a9701e1177c807789c3e9d"
},
{
"dataPath": "params_shard_262.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.43.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.44.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "baf73c3e828446ea3d75479abc470a96"
},
{
"dataPath": "params_shard_263.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "698987035084a1d3806e73d01fa08c26"
},
{
"dataPath": "params_shard_264.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "60f092a7586bf96ef04d412c54a015b1"
},
{
"dataPath": "params_shard_265.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "5296e185c6f5c9a8ca897653c2e28bb4"
},
{
"dataPath": "params_shard_266.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "dd755a3f48564e5f5ad1b2dde38178d5"
},
{
"dataPath": "params_shard_267.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6d36b10401a84370dd93d9729a076aa1"
},
{
"dataPath": "params_shard_268.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.45.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "d0600d81e390a7a3a221ceb406ae5ec2"
},
{
"dataPath": "params_shard_269.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6f08047e06699c5abb5f8f504504634e"
},
{
"dataPath": "params_shard_270.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d41899b6bf0053a7c5213ae185cf4922"
},
{
"dataPath": "params_shard_271.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.45.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "bf92671d3cfe7b10bc94d04265dc2b1f"
},
{
"dataPath": "params_shard_272.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "63332bce298c37cf8292e8dc32d32a76"
},
{
"dataPath": "params_shard_273.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f9ff07bda44a4301df01791d3d5b1c26"
},
{
"dataPath": "params_shard_274.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5b43df4e563d65f919a5b1966d9dc4ad"
},
{
"dataPath": "params_shard_275.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5d71d5938da1c71eee0df0bb4e731056"
},
{
"dataPath": "params_shard_276.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.47.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ac7bde4c775e27b059efd26a307cabf3"
},
{
"dataPath": "params_shard_277.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "037ac8e41160c1995245de416255c03a"
},
{
"dataPath": "params_shard_278.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.46.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.47.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "5d7c791ba6c06111d99ea45e9a24b14c"
},
{
"dataPath": "params_shard_279.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "9cbfeac6d49cc25cdf24f9d498715b07"
},
{
"dataPath": "params_shard_280.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "8fcf03b2986ebb6da4dd81f09d4a9651"
},
{
"dataPath": "params_shard_281.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8436d1bfd0e0ae947c602bdff2b282a7"
},
{
"dataPath": "params_shard_282.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.48.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "321e15d2709f17730c5cad0483e42492"
},
{
"dataPath": "params_shard_283.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.48.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "4ac2f33a9620760b285d03ae6b1ebce5"
},
{
"dataPath": "params_shard_284.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.48.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "3e1b9f00973c5a78f7e0a4c7fa5af73c"
},
{
"dataPath": "params_shard_285.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.48.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8e4624843314e5d6f0270bdc438f990d"
},
{
"dataPath": "params_shard_286.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.48.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "fe1c4047d2cdade601d1c0b49ba3d3c4"
},
{
"dataPath": "params_shard_287.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.48.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6cafd54b905995d898eef8103a186776"
},
{
"dataPath": "params_shard_288.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.49.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "3fe82ee8d51b322b244a85adbe521beb"
},
{
"dataPath": "params_shard_289.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.48.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.48.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.48.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.48.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.49.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "aeb1d66ebe0c2524ec050e9dbb1bb61f"
},
{
"dataPath": "params_shard_290.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.49.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "1f6c1f1265fab4a5685c727b4f31b20b"
},
{
"dataPath": "params_shard_291.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.49.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d842b7ed60c5a849f1088d3678a023c4"
},
{
"dataPath": "params_shard_292.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.49.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8fee3beebbeabfa1fbd8fd5bdf1aa91c"
},
{
"dataPath": "params_shard_293.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.49.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3404f55c49d403e06aee99bef7d13244"
},
{
"dataPath": "params_shard_294.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.50.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9fc13e3fd7279c632e644bb724d08970"
},
{
"dataPath": "params_shard_295.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "0e73a4684ec5937a52f26fddb92d19af"
},
{
"dataPath": "params_shard_296.bin",
"format": "raw-shard",
"nbytes": 29392896,
"records": [
{
"name": "model.layers.49.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.49.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.49.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.49.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.50.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 29376512
}
],
"md5sum": "1fcd8f3d5a5299144170124ac4ed8d7c"
},
{
"dataPath": "params_shard_297.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "0c8ad2b846b2d912f367e7d4168b4c25"
},
{
"dataPath": "params_shard_298.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "45f2443fc55511b94231a5d0cee6ae90"
},
{
"dataPath": "params_shard_299.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2afcb09b7b2820e3feaeba6d562b2cc8"
},
{
"dataPath": "params_shard_300.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "203e381525aa74e7cf25dca337eb1216"
},
{
"dataPath": "params_shard_301.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ad74dc7730f5320ee34433abe273e743"
},
{
"dataPath": "params_shard_302.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "968903d555ac66818a1f67a34f3cb93e"
},
{
"dataPath": "params_shard_303.bin",
"format": "raw-shard",
"nbytes": 29409280,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14696448
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 14712832
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 29392896
}
],
"md5sum": "5cce6eede08ac8098860e4d679aa2d85"
},
{
"dataPath": "params_shard_304.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3049f9e84d7fab41561b4af0bb5a00da"
},
{
"dataPath": "params_shard_305.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "5d84f6c2dfbca41ce5e7355be656b927"
},
{
"dataPath": "params_shard_306.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "d715e60b3fdd63068c6222fa1e52f1b1"
},
{
"dataPath": "params_shard_307.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1ceff429826f29524653c4e9ce1f5d93"
},
{
"dataPath": "params_shard_308.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "751c65072f0d9686b12327e3972302ed"
},
{
"dataPath": "params_shard_309.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "14cc052ad9bed477ba63e15b29b30790"
},
{
"dataPath": "params_shard_310.bin",
"format": "raw-shard",
"nbytes": 29392896,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9453568
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24150016
}
],
"md5sum": "e4375dd1295f7ab383f851d21236005d"
},
{
"dataPath": "params_shard_311.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "401ad77b3d628b88c78e1f0f8db91a01"
},
{
"dataPath": "params_shard_312.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.50.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d2bbe605ea7ace155205b823ee59ccdb"
},
{
"dataPath": "params_shard_313.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.50.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "cfb7fc2f5b674aeddbaf23a0d26cc240"
},
{
"dataPath": "params_shard_314.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.50.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "44af979c20008e1784ba4eafe0297ad4"
},
{
"dataPath": "params_shard_315.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.50.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "31997e38bdb2a08972fc59f42fab0e28"
},
{
"dataPath": "params_shard_316.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.51.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "df5e9aacb6a647f25d7108935f7a437d"
},
{
"dataPath": "params_shard_317.bin",
"format": "raw-shard",
"nbytes": 28360704,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 4194304
},
{
"name": "model.layers.50.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.50.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 9453568
},
{
"name": "model.layers.50.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
},
{
"name": "model.layers.50.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 24150016
},
{
"name": "model.layers.51.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 28344320
}
],
"md5sum": "aab2e35fd2ce81e01fd313b507b9bb92"
},
{
"dataPath": "params_shard_318.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.51.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "8f6f05e20458f750c8c16e5e359eb5d7"
},
{
"dataPath": "params_shard_319.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.51.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e4de48a9f5be393d7ee8fd7252ed7c1e"
},
{
"dataPath": "params_shard_320.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.51.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "dd66d1571b46f00577bae7178e753275"
},
{
"dataPath": "params_shard_321.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.51.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "69bf75f050a90241e703ce49d2b90c73"
},
{
"dataPath": "params_shard_322.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.52.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "1c769bc461796e2bf4179cab040cc38c"
},
{
"dataPath": "params_shard_323.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.51.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.51.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.51.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.51.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.52.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "3a7a8f74ea3a8e6b538d7d44d2143745"
},
{
"dataPath": "params_shard_324.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.52.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "67014a38b11ca8effb6511d94eefa21a"
},
{
"dataPath": "params_shard_325.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.52.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0e05ff59e562f2856858e54d5ad5b4c5"
},
{
"dataPath": "params_shard_326.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.52.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "d726f5c20c12a5bb85ea428218a352ce"
},
{
"dataPath": "params_shard_327.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.52.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7f8c7acd55748fcdcd81dd9a5c584541"
},
{
"dataPath": "params_shard_328.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.53.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "23d4fae816b3b48e898f0c83bded579d"
},
{
"dataPath": "params_shard_329.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.52.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.52.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.52.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.52.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.53.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "a0ae8944935a11d833a44578456accf7"
},
{
"dataPath": "params_shard_330.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.53.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ac65f4d6fa1b4d11b797147bdd88f15d"
},
{
"dataPath": "params_shard_331.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.53.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3ad4d8970ae045ad51ff643f4f1ed41e"
},
{
"dataPath": "params_shard_332.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.53.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ef9bd631745da21d68b2d32a39a1da36"
},
{
"dataPath": "params_shard_333.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.53.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f3f5b71786144f7abba659241d02f70b"
},
{
"dataPath": "params_shard_334.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.54.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "e8a0a19194e6fc04d298db1e9f9f8a94"
},
{
"dataPath": "params_shard_335.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.53.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.53.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.53.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.53.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.54.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "025d825b9bcaf5c85f71f9b671ba5571"
},
{
"dataPath": "params_shard_336.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.54.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "44e3d1ec8093e9375f0cf5fbb952e6d6"
},
{
"dataPath": "params_shard_337.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.54.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ec789c4e369728f87d8759dd4256544f"
},
{
"dataPath": "params_shard_338.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.54.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8a3dfa9d77867781f68a584e92add1f0"
},
{
"dataPath": "params_shard_339.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.54.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2be83cdba8b75839d3cfa0262fa91954"
},
{
"dataPath": "params_shard_340.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.55.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "c335e04a4c762d84cc04b157c8ba854d"
},
{
"dataPath": "params_shard_341.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.55.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "391ec43b28f08ca9756a8941ef511eba"
},
{
"dataPath": "params_shard_342.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.55.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5c493dba8205393ac79d0977c1b00662"
},
{
"dataPath": "params_shard_343.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.55.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "563c087bd66dc9f478333c1a312e851f"
},
{
"dataPath": "params_shard_344.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.54.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.54.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.54.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.54.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.55.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "16970b5cba856865d3a8acb1928a3ab2"
},
{
"dataPath": "params_shard_345.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.55.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "9239aa5afb9b0eac3d3d9d34d02cd96f"
},
{
"dataPath": "params_shard_346.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.56.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "9f992a94f21827f05cdf462d7a10c294"
},
{
"dataPath": "params_shard_347.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.55.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.55.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.55.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.55.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.56.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "3fe34c6a1dae1f4e99d7cc1cd4d7e259"
},
{
"dataPath": "params_shard_348.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.56.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "c0b5993243712394ccded88e3a941ba5"
},
{
"dataPath": "params_shard_349.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.56.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f535e83dd504bc9c1a914f32e254b2d1"
},
{
"dataPath": "params_shard_350.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.56.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b251c437273c61600fce62f5cbb6180b"
},
{
"dataPath": "params_shard_351.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.56.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "02153f294c210a32415635da335fe879"
},
{
"dataPath": "params_shard_352.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.57.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "7324a29ad5aeb0c0592521c99a5907a1"
},
{
"dataPath": "params_shard_353.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.56.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.56.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.56.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.56.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.57.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "e6ac2310651aa3f3ea0310ec15ecf1a9"
},
{
"dataPath": "params_shard_354.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.57.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e42807bb0c424fe113646b010cc82a04"
},
{
"dataPath": "params_shard_355.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.57.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c4317659f75af8c99293daac3fcee077"
},
{
"dataPath": "params_shard_356.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.57.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2aef841451b7c9064f7e75ef6a323b06"
},
{
"dataPath": "params_shard_357.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.57.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "97d01fca90f60977e8757578a08a540a"
},
{
"dataPath": "params_shard_358.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.58.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "0a2c2a65fc80963f6fd5d6500c1fbef2"
},
{
"dataPath": "params_shard_359.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.58.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4eb734f674ecd57da7e1e97d432e1a3b"
},
{
"dataPath": "params_shard_360.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.58.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "17923ecc7113a037e2f8120c715ab053"
},
{
"dataPath": "params_shard_361.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.58.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "10bd5bc2311a13488bfc0d466fe44851"
},
{
"dataPath": "params_shard_362.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.57.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.57.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.57.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.57.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.58.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "bfe701a46ffb040735cc4dc3b0e56622"
},
{
"dataPath": "params_shard_363.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.58.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "be40d50fd133199741cc76158a9bac3a"
},
{
"dataPath": "params_shard_364.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.59.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "5b3ac56d1151f261f8b13de50c3a1bb0"
},
{
"dataPath": "params_shard_365.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.58.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.58.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.58.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.58.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.59.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "ab29a70e495e0f10773bdc77af9e5050"
},
{
"dataPath": "params_shard_366.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.59.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a855102967e2e7c675b9f01b61113901"
},
{
"dataPath": "params_shard_367.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.59.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "89d1b518c12bda78ed6546b94926147b"
},
{
"dataPath": "params_shard_368.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.59.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a8c0d5c1399554c15044912964dc0837"
},
{
"dataPath": "params_shard_369.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.59.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9f7ff5de5bf7b1dbbc0f646caa8c09b7"
},
{
"dataPath": "params_shard_370.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.60.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "00824b79a1fed1107efb31a310caab9a"
},
{
"dataPath": "params_shard_371.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.59.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.59.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.59.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.59.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.60.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "dedde11ca27027250a95aa8889318f0f"
},
{
"dataPath": "params_shard_372.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.60.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f6c207a84cf2f3c2495f40d38bb37a25"
},
{
"dataPath": "params_shard_373.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.60.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "56df80ff7010d09568beccc0698e91f6"
},
{
"dataPath": "params_shard_374.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.60.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "592d51e1e0b4f2d8d045236afc3e20d4"
},
{
"dataPath": "params_shard_375.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.60.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "08ee93c52fa70d7ecc39e35f3f4b1a97"
},
{
"dataPath": "params_shard_376.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.61.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b5ab28541f4b5729077715d74c9c192c"
},
{
"dataPath": "params_shard_377.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.61.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5e2273978f7d4c0e6e3dd14d8659b6c1"
},
{
"dataPath": "params_shard_378.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.60.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.60.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.60.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.60.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.61.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "6601f32be9f1f561d5132f8031b6be4e"
},
{
"dataPath": "params_shard_379.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.61.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "49bce736287c706f1f52cbe11cdf02ca"
},
{
"dataPath": "params_shard_380.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.61.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a16e05e247eff8fa6084bd20365b3176"
},
{
"dataPath": "params_shard_381.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.61.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b8bb7083172e3140101b9d95a909f61e"
},
{
"dataPath": "params_shard_382.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.62.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "47e71dac8e3b417b7574ffff8ee3c18b"
},
{
"dataPath": "params_shard_383.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.61.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.61.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.61.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.61.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.62.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "be1e23e98295136b36ac5a6b85b8e866"
},
{
"dataPath": "params_shard_384.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.62.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "12d76e08129de7537386259649e64f89"
},
{
"dataPath": "params_shard_385.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.62.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1c5dc07c427a91d3a78c3d7a431d3f1e"
},
{
"dataPath": "params_shard_386.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.62.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "aca71a88e3bc80c37a64ba46a73645b7"
},
{
"dataPath": "params_shard_387.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.62.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fda39b594ef381ea9213d81582d8a3dd"
},
{
"dataPath": "params_shard_388.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.63.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "00703194b62c8fbeaebe882076a64c81"
},
{
"dataPath": "params_shard_389.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.62.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.62.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.62.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.62.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.63.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "09d349aaf6384238b351105203616fe0"
},
{
"dataPath": "params_shard_390.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.63.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4066af21ae8f777e2a0e9d8764cf9518"
},
{
"dataPath": "params_shard_391.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.63.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "32ce691084d99ec7cb90d4db6ef003dc"
},
{
"dataPath": "params_shard_392.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.63.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "f513ec7cf278056c8a4a570c531f1216"
},
{
"dataPath": "params_shard_393.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.63.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b413d77113869b7b399542ba89e63d29"
},
{
"dataPath": "params_shard_394.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.64.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "394a0d80e62f83b63b74955a0d65d12a"
},
{
"dataPath": "params_shard_395.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.64.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "ad00d28412f886c1e2ee659cd2ef4b53"
},
{
"dataPath": "params_shard_396.bin",
"format": "raw-shard",
"nbytes": 29392896,
"records": [
{
"name": "model.layers.63.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.63.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.63.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.63.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.64.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
},
{
"name": "model.layers.64.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 29376512
}
],
"md5sum": "4f1c848224278131f60bbe55279ed984"
},
{
"dataPath": "params_shard_397.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.64.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4b864f11f603dbd5944c5afd160ff3fa"
},
{
"dataPath": "params_shard_398.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.64.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "046e4e1ef2da759c6fa5874e65278046"
},
{
"dataPath": "params_shard_399.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.64.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "58e50ee84ea2d521191fb33bb44f4534"
},
{
"dataPath": "params_shard_400.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.65.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "050fb63dad1abd8c96b9389306a1253c"
},
{
"dataPath": "params_shard_401.bin",
"format": "raw-shard",
"nbytes": 18907136,
"records": [
{
"name": "model.layers.64.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.64.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.64.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 14696448
},
{
"name": "model.layers.65.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
}
],
"md5sum": "442d6303990f7d6f7e08181446492c74"
},
{
"dataPath": "params_shard_402.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.65.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "7eac88442b1c7dbf7c9b01417336fe36"
},
{
"dataPath": "params_shard_403.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.65.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d68a604a8a7cde720248158baaeb5fb3"
},
{
"dataPath": "params_shard_404.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.65.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e9933434bf97e5071e7c92dbbf9fa425"
},
{
"dataPath": "params_shard_405.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.65.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8d6a2a8330c36f34a5e9d5056e3ca3ed"
},
{
"dataPath": "params_shard_406.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.66.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "65164fd163c577e7cb446e3c7de970f5"
},
{
"dataPath": "params_shard_407.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.65.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.65.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.65.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.65.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.66.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "2973295a8c9c1b5e503b1a8b39a89df6"
},
{
"dataPath": "params_shard_408.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.66.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "06c773083be5836c24028d53ab3df8ad"
},
{
"dataPath": "params_shard_409.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.66.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "54a15fb267a40c0349303ac2312cd177"
},
{
"dataPath": "params_shard_410.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.66.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "805a5093578f48b31b8707a68fa0e37e"
},
{
"dataPath": "params_shard_411.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.66.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8d2297f25d5b8c5df5a502d188acc8e7"
},
{
"dataPath": "params_shard_412.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.67.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "438dc3d418fa5e96388dfdef75bca6cb"
},
{
"dataPath": "params_shard_413.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.66.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.66.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.66.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.66.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.67.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "69de340841e2e55c2eba5b50e2e2a886"
},
{
"dataPath": "params_shard_414.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.67.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "9aa59f2ee05737e852b05747432f1032"
},
{
"dataPath": "params_shard_415.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.67.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6db42f1d4cc23bfcf938ec1d8d51ffdd"
},
{
"dataPath": "params_shard_416.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.67.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "baf5a1ada5b29bbe749973dca2fed8e5"
},
{
"dataPath": "params_shard_417.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.67.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "023f811a1e4b0efdf5db89c765650395"
},
{
"dataPath": "params_shard_418.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.68.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "4ce9c2cf06b43af76116d999c80fcdc6"
},
{
"dataPath": "params_shard_419.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.67.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.67.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.67.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.67.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.68.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "1f5ac9ecd224b54d29131dc2e061af22"
},
{
"dataPath": "params_shard_420.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.68.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "15840634201b8a6a82d5f331d22bafbc"
},
{
"dataPath": "params_shard_421.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.68.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "898fc1d61c94635ef743f783222c2e8f"
},
{
"dataPath": "params_shard_422.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.68.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "72555825804ddf9cf5d3a2168e35bdd1"
},
{
"dataPath": "params_shard_423.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.68.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2171e0ab9b457d1dfbca0e13da776615"
},
{
"dataPath": "params_shard_424.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.69.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "534e288c8e3962d9f6323a4b18683a22"
},
{
"dataPath": "params_shard_425.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.69.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "eccd768d0444cd1e2c95eab5601eb66e"
},
{
"dataPath": "params_shard_426.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.69.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b732286242dc99cbe6fa0dda1805f3ab"
},
{
"dataPath": "params_shard_427.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.69.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b116087121f8ab62155c9847f410b39b"
},
{
"dataPath": "params_shard_428.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.68.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.68.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.68.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.68.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.69.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "8f01a6bb9dd0f24f01d58d4f10429ffb"
},
{
"dataPath": "params_shard_429.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.69.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "44601f73c5e83e0bd42d0b5915d0961a"
},
{
"dataPath": "params_shard_430.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.70.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2433b56edecdd2e6309d6e5e99ca6f93"
},
{
"dataPath": "params_shard_431.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.69.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.69.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.69.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.69.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.70.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "a986fbe9571ba1ceecbb064a5bc74cc9"
},
{
"dataPath": "params_shard_432.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.70.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "cc702c5d495bcd4b4c6210feebde87c3"
},
{
"dataPath": "params_shard_433.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.70.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "050efb2f5488d53e66ae3ae4fffcbb33"
},
{
"dataPath": "params_shard_434.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.70.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "535a3e62db2069aaf45cb50cb42368f3"
},
{
"dataPath": "params_shard_435.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.70.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "66798ab694aace297c1652f088092b25"
},
{
"dataPath": "params_shard_436.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.71.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "119e9d92bff3619213b632e5512af112"
},
{
"dataPath": "params_shard_437.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.70.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.70.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.70.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.70.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.71.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "db236da1e37c32527ec46d01ab4ba71b"
},
{
"dataPath": "params_shard_438.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.71.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "11262325d55586ae6249433900bc83b0"
},
{
"dataPath": "params_shard_439.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.71.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6bacd5203e9f4fc0d13cd120955fae08"
},
{
"dataPath": "params_shard_440.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.71.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "0b8bb948b2cbd88975b57ea8a3730a58"
},
{
"dataPath": "params_shard_441.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.71.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8d9eba94aa293dcad09f4a24b06cf5e3"
},
{
"dataPath": "params_shard_442.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.72.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "2a4b74532497a69a075db74a6acfccc0"
},
{
"dataPath": "params_shard_443.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.72.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6cbc77b117c6f866a7d0fcbe372afa0b"
},
{
"dataPath": "params_shard_444.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.72.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a4ba91f4810331811117225cf9a6b4bc"
},
{
"dataPath": "params_shard_445.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.72.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f26b2d0d6c5cf8a4e6b1f8b003c91c63"
},
{
"dataPath": "params_shard_446.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.71.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.71.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.71.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.71.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.72.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "b82d348656f206b66446d310772a2ebc"
},
{
"dataPath": "params_shard_447.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.72.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "e50eab15ee0e68df2cbd81153bb8a29a"
},
{
"dataPath": "params_shard_448.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.73.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c7341f0a4372ce0c59b2bcbcb4ee199f"
},
{
"dataPath": "params_shard_449.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.72.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.72.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.72.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.72.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.73.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "12864c7cec02d2f9943895c4f4b2bc6a"
},
{
"dataPath": "params_shard_450.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.73.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "b5da6eb5a4f216cdedf1417f733be53d"
},
{
"dataPath": "params_shard_451.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.73.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c8bc856c4884d26d5c9f433ef458beeb"
},
{
"dataPath": "params_shard_452.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.73.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9f4a0dcb269e964cc5c29553c13a0e8e"
},
{
"dataPath": "params_shard_453.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.73.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "47778d8afd2b6befc1ed006f9de89cb8"
},
{
"dataPath": "params_shard_454.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.74.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "154314e431ca05c572fa66a601cfaf47"
},
{
"dataPath": "params_shard_455.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.73.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.73.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.73.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.73.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.74.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "d3bf76f79425bed262c2583c112e8d6a"
},
{
"dataPath": "params_shard_456.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.74.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "82dbca69ad912eac81047bab63a746d1"
},
{
"dataPath": "params_shard_457.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.74.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8641e16be2a7a7e37b3f251d24c0bc34"
},
{
"dataPath": "params_shard_458.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.74.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9954a191167b16593d9f172470e73b48"
},
{
"dataPath": "params_shard_459.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.74.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "999f6611a8bd12c4d5d8cd6711f0b4c1"
},
{
"dataPath": "params_shard_460.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.75.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a19d02c18221d9318b73419b081bf9e6"
},
{
"dataPath": "params_shard_461.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.75.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e722ffbd7d016ac8ae7e1a30a96b4835"
},
{
"dataPath": "params_shard_462.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.74.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.74.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.74.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.74.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.75.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "2ea4ebc654f82b4fa524860cee4c0a61"
},
{
"dataPath": "params_shard_463.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.75.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "9c1b671ddd666311070c360a4579f7fc"
},
{
"dataPath": "params_shard_464.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.75.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "b28874631b6dd345b47414ef71c995e5"
},
{
"dataPath": "params_shard_465.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.75.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "9742b71abc6a452de69cc2c21895a1a8"
},
{
"dataPath": "params_shard_466.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.76.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c054ae03a25dcc11cc8778796eec97f7"
},
{
"dataPath": "params_shard_467.bin",
"format": "raw-shard",
"nbytes": 18923520,
"records": [
{
"name": "model.layers.75.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.75.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.75.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 4210688
},
{
"name": "model.layers.75.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18890752
},
{
"name": "model.layers.76.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18907136
}
],
"md5sum": "ca7a1d0cd7793de7c59e169cc5ba5b31"
},
{
"dataPath": "params_shard_468.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.76.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "d365c4cd3b22eede366c02bbec0a9e86"
},
{
"dataPath": "params_shard_469.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.76.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2c10aa1530f61fd7b03d638d98b1fbd6"
},
{
"dataPath": "params_shard_470.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.76.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ba5f2d95dbddfc2f526bb880e57dde3e"
},
{
"dataPath": "params_shard_471.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.76.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "573d5a90189faad6bb6e0e4608d068d0"
},
{
"dataPath": "params_shard_472.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.77.mlp.down_proj.q_weight",
"shape": [
8192,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "07647877f6c7a58586a8a5389f447b80"
},
{
"dataPath": "params_shard_473.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.76.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.76.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.76.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.76.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.77.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24133632
}
],
"md5sum": "69f111a81530e455587a096a935a8c36"
},
{
"dataPath": "params_shard_474.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.77.mlp.gate_up_proj.q_weight",
"shape": [
57344,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4d0a3f671cdc776386eb9cecc62ea5d9"
},
{
"dataPath": "params_shard_475.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.77.mlp.gate_up_proj.q_scale",
"shape": [
57344,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "882e0e154f1281f339de65507e3e28e2"
},
{
"dataPath": "params_shard_476.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.77.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4c9936aacfccc30727c4a5bd2526ef89"
},
{
"dataPath": "params_shard_477.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.77.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "259cf83c8338b1a5d768bd9474b36711"
},
{
"dataPath": "params_shard_478.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.78.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "40b55660002bb8c2df6a584cd5f24604"
},
{
"dataPath": "params_shard_479.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.77.mlp.down_proj.q_scale",
"shape": [
8192,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14680064,
"byteOffset": 0
},
{
"name": "model.layers.77.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 14680064
},
{
"name": "model.layers.77.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 14696448
},
{
"name": "model.layers.77.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 19939328
},
{
"name": "model.layers.78.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 24133632
}
],
"md5sum": "5e2ff9755c18a4644cfa378d01bd3128"
}
]
}