{ "metadata": { "ParamSize": 325, "ParamBytes": 4526991360.0, "BitsPerParam": 5.000992644251571 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65540096, "records": [ { "name": "lm_head.q_weight", "shape": [ 32002, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65540096, "byteOffset": 0 } ], "md5sum": "cf59959503722076993213134db3c956" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3f5d9b9ff83129597d7e1523d15e9096" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "dd525b7e9032fbe6026e2b2e7a1e618b" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "74d6d2ba782f3e5a64f62fa42fcd33ff" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "46bec84313bc6b30f3ecd67b827c4c4d" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 30245376, "records": [ { "name": "lm_head.q_scale", "shape": [ 32002, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192512, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192512 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8200704 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11870720 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19210752 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19218944 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19227136 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22897152 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30237184 } ], "md5sum": "5144668a1c589376836fb09307f50d11" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4b2aa1c66c8f1ff1adc4445ac9fee365" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "409132739bd7755038ac5e793f69efbd" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "3f44e3cc28ba4b2af5cb168b0a2a9eed" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1ca2a23152e9520a8f7a8aa235215a9b" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "7d0a199657157da875da5d6ed9133998" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2679d3676108321d83a0dfc9dee28a01" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "a28f0175dc87b4c96000f2b740e58d9a" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b1f828c087f34b9fb89737bcac9de093" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c718b7b91804c1a7d3ef291f0b92c60a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "4cef20a94f8af3a6d6e85347615ff677" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b7cf6c8aa1d750e01800d04db8033fae" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0aaa25b92711b93ebe9e61ec96f8cd32" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "8e509589d8182dbdec45dbd549784632" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "135cf3c122e5a4b030253349dde3878a" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4a9d33533ac2ff2f4f6964aca82d7ee9" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "67d1307e69eb18073af61837118471d4" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "10ac84694124adfcbe5ea30ce6fabf35" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "59e5e720a3ce05e786115208898e097c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "57476bfafa8b5f99422b976bea05f364" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "c30c7eaa88aa69f713e9f83c1142fdd7" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "45cd6664af225c75050ab7f624a7e5e7" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "12d69cd1d25795cd70bae632fec7f03d" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "5dfe5db194ba73bd36151e4eb753ff4d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3d84978f592e934af77f79f43f6b798f" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "644968fe88b479bc0c195c76471f8f09" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "a447f73d984ffb24be8ca9d566fdd75c" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 65540096, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32002, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65540096, "byteOffset": 0 } ], "md5sum": "7f4096dae1c40c9b25aa20370b0e15cb" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d11344b6c16c5b506718dc2bc3370346" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31801856, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32002, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192512, "byteOffset": 23601152 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31793664 } ], "md5sum": "8baa0e2cdfe896a49a5e9e6ea7b31511" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "74fb56c0beb5096dae569f3d99ee6f51" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "bef51d6f1e302e9f2051be8fb2a14b7b" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "42f6d8f6108b54ff38d2a14e7d26c44b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bee1545ebd6d3fc2bfaa51215c304bdd" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "07511e41619039745408836311877d94" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0122775814a2ed28c32b2ba8c15f5b14" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fcc841dd13e54a921574dde9cf32be2d" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "432f2b7bac8973bdf964d63d5e68009f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3403a75f6354fb1944aa55903242e468" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "55ccb580e25988fc4f0f6c2ee23124e8" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "a1ed35d609e64c6c156688fdb66ffae8" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "418c6d873a6a0d7aaff77662a47bf888" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "f69c81cba4f580f347e24ba10aec921e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "45b9b017dd5afa5f120d346f86169060" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "dfb6ff0eff7aed37320255301950796e" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5401c9c8774f4ad3a38f22ec309b9b40" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "38a50959afba8ab3d93fe1cad950293a" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "1baf70f9b5c823ba03878047807b5a93" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e878a7d94d6711d0c118f2a1d8686cfe" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2f9a02c4d6ed1f17b6bfad55210b0c95" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "831cb9ae23d8e0ce11d391b85a64f272" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1dc81bd249ab839ad75b90b929ffebf6" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "89365265f8a2909ab4cc93de69807d49" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "2c5b47cbf7bb9b3af05ec4803c75e3da" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6a1203c534a7548c4abfc6c380d02a8d" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "7e77c98ccc4e1bdf6448af6e31e8dc60" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "dfea2ee933658116c6699750cba00e5e" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "d0fbd67bceeda2b7088f2780b6a4060b" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "90babe4d2f96d7917fa7a091edc0aa88" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9a1c42be667ab2feb19d4f2f16585631" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "38507c0cbe54848e1696a2b025c2821c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b9c18b10e1b795e078a44341eb555679" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e834ac5b740edd2f684e1ce4dead0f96" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "801765a799c7595e0ea7590258f4c5cb" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d10e0276513ae5848b85c78bc720b0bc" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cdbe2ad3fbd5fb2fdae9b15a9e37bf66" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "d7564e82806fdf92902eb9e2be21e4a8" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "75231a501eb21aff98cc44614a9c582a" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "1022385bbc1edbc4de730e9c8c35a445" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6afa8f205cfd6b1b19e385640e716df8" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "8f098eb721b702b536e2bd183f328a73" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e6aee8ff187dd67ae36741fca05b552e" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bf89dc2efd12df235eec2d1c408a3c8b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "93ecfa046b906a82019c60178a7afe11" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cda305e16f5c58df98a63478e0971a7a" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b0599f552d4e139ce76c7c72575e9d35" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "d5a8a6073d7f1aa30d0e5adee2b0058a" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 23592960, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 } ], "md5sum": "1d0d2434f48660f42efbef2920c5d844" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2c535e0bdb978e78a3d1b5a73cc8f3aa" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "291f9b59260bc8bdfdd6dc30381561f8" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "74a43f56e76ec22dc7b577ffbd1301a4" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "19a1bc1382381ad908557e06dd6bdf92" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "f9b3ecbd697f7a89228377930e26f3ac" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "89b292b14e807d664dff4b82ac7d79bf" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "f5e6690a3b06369b3d817c9800c3273b" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ac6f689891ffa4f2f2cdaef1d9255f7b" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cd88e2b8d7a33100e923ef5be2fc5542" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "346a866cc300ba1506063647627c7fb1" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "daa4c0f349ef036b27d031a0554d8ea1" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "df0245a57de1fcbd74e87a34cf9b4dd6" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "1d8085f6341a95b442fb8a8f0beb7a95" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "384ce9ce8cfb6c591137be9b05ee9414" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "988a64793d7d86a8008a921d6bb77e2a" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "5f71915d22e4c3b08fd55125233bb7b0" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "35d975491e572e3ed60a6582b5717541" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "ebcc1ecf3950b827b0e4a9e1a6989850" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d2f8889c2f41b296b0610901a158e715" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "192680ebc1cb01b5f3f5a56f09503e05" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cfae739313e5aaba0ee6f1ff6569c627" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ed695c25f1ce55d7e921cd3a69a5ad21" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "321a842ab12b6feeefc73a50dcf5c45e" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 11010048, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 } ], "md5sum": "4e879765fdc12f262108e1b2189810ad" } ] }