| { |
| "metadata": { |
| "ParamSize": 63, |
| "ParamBytes": 7862016.0, |
| "BitsPerParam": 4.504111095833906 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 7862016, |
| "records": [ |
| { |
| "name": "model.embed_tokens.q_weight", |
| "shape": [ |
| 4096, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 786432, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "model.embed_tokens.q_scale", |
| "shape": [ |
| 4096, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 98304, |
| "byteOffset": 786432 |
| }, |
| { |
| "name": "model.layers.0.input_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 884736 |
| }, |
| { |
| "name": "model.layers.0.mlp.down_proj.q_weight", |
| "shape": [ |
| 384, |
| 96 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 147456, |
| "byteOffset": 885504 |
| }, |
| { |
| "name": "model.layers.0.mlp.down_proj.q_scale", |
| "shape": [ |
| 384, |
| 24 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18432, |
| "byteOffset": 1032960 |
| }, |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 1536, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 294912, |
| "byteOffset": 1051392 |
| }, |
| { |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 1536, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 36864, |
| "byteOffset": 1346304 |
| }, |
| { |
| "name": "model.layers.0.post_attention_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 1383168 |
| }, |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 2048, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 393216, |
| "byteOffset": 1383936 |
| }, |
| { |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 2048, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 1777152 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_weight", |
| "shape": [ |
| 384, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 196608, |
| "byteOffset": 1826304 |
| }, |
| { |
| "name": "model.layers.0.self_attn.o_proj.q_scale", |
| "shape": [ |
| 384, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 2022912 |
| }, |
| { |
| "name": "model.layers.1.input_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 2047488 |
| }, |
| { |
| "name": "model.layers.1.mlp.down_proj.q_weight", |
| "shape": [ |
| 384, |
| 96 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 147456, |
| "byteOffset": 2048256 |
| }, |
| { |
| "name": "model.layers.1.mlp.down_proj.q_scale", |
| "shape": [ |
| 384, |
| 24 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18432, |
| "byteOffset": 2195712 |
| }, |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 1536, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 294912, |
| "byteOffset": 2214144 |
| }, |
| { |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 1536, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 36864, |
| "byteOffset": 2509056 |
| }, |
| { |
| "name": "model.layers.1.post_attention_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 2545920 |
| }, |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 2048, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 393216, |
| "byteOffset": 2546688 |
| }, |
| { |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 2048, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2939904 |
| }, |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_weight", |
| "shape": [ |
| 384, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 196608, |
| "byteOffset": 2989056 |
| }, |
| { |
| "name": "model.layers.1.self_attn.o_proj.q_scale", |
| "shape": [ |
| 384, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 3185664 |
| }, |
| { |
| "name": "model.layers.2.input_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 3210240 |
| }, |
| { |
| "name": "model.layers.2.mlp.down_proj.q_weight", |
| "shape": [ |
| 384, |
| 96 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 147456, |
| "byteOffset": 3211008 |
| }, |
| { |
| "name": "model.layers.2.mlp.down_proj.q_scale", |
| "shape": [ |
| 384, |
| 24 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18432, |
| "byteOffset": 3358464 |
| }, |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 1536, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 294912, |
| "byteOffset": 3376896 |
| }, |
| { |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 1536, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 36864, |
| "byteOffset": 3671808 |
| }, |
| { |
| "name": "model.layers.2.post_attention_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 3708672 |
| }, |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 2048, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 393216, |
| "byteOffset": 3709440 |
| }, |
| { |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 2048, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 4102656 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_weight", |
| "shape": [ |
| 384, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 196608, |
| "byteOffset": 4151808 |
| }, |
| { |
| "name": "model.layers.2.self_attn.o_proj.q_scale", |
| "shape": [ |
| 384, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 4348416 |
| }, |
| { |
| "name": "model.layers.3.input_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 4372992 |
| }, |
| { |
| "name": "model.layers.3.mlp.down_proj.q_weight", |
| "shape": [ |
| 384, |
| 96 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 147456, |
| "byteOffset": 4373760 |
| }, |
| { |
| "name": "model.layers.3.mlp.down_proj.q_scale", |
| "shape": [ |
| 384, |
| 24 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18432, |
| "byteOffset": 4521216 |
| }, |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 1536, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 294912, |
| "byteOffset": 4539648 |
| }, |
| { |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 1536, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 36864, |
| "byteOffset": 4834560 |
| }, |
| { |
| "name": "model.layers.3.post_attention_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 4871424 |
| }, |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 2048, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 393216, |
| "byteOffset": 4872192 |
| }, |
| { |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 2048, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 5265408 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_weight", |
| "shape": [ |
| 384, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 196608, |
| "byteOffset": 5314560 |
| }, |
| { |
| "name": "model.layers.3.self_attn.o_proj.q_scale", |
| "shape": [ |
| 384, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 5511168 |
| }, |
| { |
| "name": "model.layers.4.input_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 5535744 |
| }, |
| { |
| "name": "model.layers.4.mlp.down_proj.q_weight", |
| "shape": [ |
| 384, |
| 96 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 147456, |
| "byteOffset": 5536512 |
| }, |
| { |
| "name": "model.layers.4.mlp.down_proj.q_scale", |
| "shape": [ |
| 384, |
| 24 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18432, |
| "byteOffset": 5683968 |
| }, |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 1536, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 294912, |
| "byteOffset": 5702400 |
| }, |
| { |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 1536, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 36864, |
| "byteOffset": 5997312 |
| }, |
| { |
| "name": "model.layers.4.post_attention_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 6034176 |
| }, |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 2048, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 393216, |
| "byteOffset": 6034944 |
| }, |
| { |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 2048, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 6428160 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_weight", |
| "shape": [ |
| 384, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 196608, |
| "byteOffset": 6477312 |
| }, |
| { |
| "name": "model.layers.4.self_attn.o_proj.q_scale", |
| "shape": [ |
| 384, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 6673920 |
| }, |
| { |
| "name": "model.layers.5.input_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 6698496 |
| }, |
| { |
| "name": "model.layers.5.mlp.down_proj.q_weight", |
| "shape": [ |
| 384, |
| 96 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 147456, |
| "byteOffset": 6699264 |
| }, |
| { |
| "name": "model.layers.5.mlp.down_proj.q_scale", |
| "shape": [ |
| 384, |
| 24 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18432, |
| "byteOffset": 6846720 |
| }, |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", |
| "shape": [ |
| 1536, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 294912, |
| "byteOffset": 6865152 |
| }, |
| { |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", |
| "shape": [ |
| 1536, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 36864, |
| "byteOffset": 7160064 |
| }, |
| { |
| "name": "model.layers.5.post_attention_layernorm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 7196928 |
| }, |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", |
| "shape": [ |
| 2048, |
| 48 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 393216, |
| "byteOffset": 7197696 |
| }, |
| { |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", |
| "shape": [ |
| 2048, |
| 12 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 7590912 |
| }, |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_weight", |
| "shape": [ |
| 384, |
| 128 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 196608, |
| "byteOffset": 7640064 |
| }, |
| { |
| "name": "model.layers.5.self_attn.o_proj.q_scale", |
| "shape": [ |
| 384, |
| 32 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 24576, |
| "byteOffset": 7836672 |
| }, |
| { |
| "name": "model.norm.weight", |
| "shape": [ |
| 384 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 768, |
| "byteOffset": 7861248 |
| } |
| ], |
| "md5sum": "c4a7c141bffb4bf77b662a6b32138dc1" |
| } |
| ] |
| } |