WpythonW commited on
Commit
6a6913c
·
verified ·
1 Parent(s): 056face

Upload folder using huggingface_hub

Browse files
Qwen3-14B-FP8-modelopt-pp1/config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "modelopt",
4
+ "version": "0.33.1"
5
+ },
6
+ "architecture": "Qwen3ForCausalLM",
7
+ "dtype": "bfloat16",
8
+ "logits_dtype": "float16",
9
+ "num_hidden_layers": 40,
10
+ "num_attention_heads": 40,
11
+ "num_key_value_heads": 8,
12
+ "hidden_size": 5120,
13
+ "norm_epsilon": 1e-06,
14
+ "vocab_size": 151936,
15
+ "max_position_embeddings": 40960,
16
+ "hidden_act": "silu",
17
+ "use_parallel_embedding": true,
18
+ "embedding_sharding_dim": 0,
19
+ "head_size": 128,
20
+ "intermediate_size": 17408,
21
+ "position_embedding_type": "rope_gpt_neox",
22
+ "share_embedding_table": false,
23
+ "residual_mlp": false,
24
+ "bias": false,
25
+ "rotary_pct": 1.0,
26
+ "rank": 0,
27
+ "decoder": "qwen",
28
+ "rmsnorm": true,
29
+ "lm_head_bias": false,
30
+ "mlp_bias": false,
31
+ "attn_bias": false,
32
+ "rotary_base": 1000000,
33
+ "rotary_scaling": null,
34
+ "disable_weight_only_quant_plugin": false,
35
+ "num_labels": 1,
36
+ "use_logn_attn": false,
37
+ "mlp_only_layers": [],
38
+ "decoder_sparse_step": 1,
39
+ "moe": {
40
+ "num_experts": 0,
41
+ "shared_expert_intermediate_size": 0,
42
+ "top_k": 0,
43
+ "normalization_mode": 0,
44
+ "sparse_mixer_epsilon": 0.01,
45
+ "tp_mode": 0,
46
+ "device_limited_n_group": 0,
47
+ "device_limited_topk_group": 0,
48
+ "device_limited_routed_scaling_factor": 1.0
49
+ },
50
+ "runtime_defaults": null,
51
+ "mapping": {
52
+ "world_size": 1,
53
+ "gpus_per_node": 8,
54
+ "cp_size": 1,
55
+ "tp_size": 1,
56
+ "pp_size": 1,
57
+ "moe_tp_size": 1,
58
+ "moe_cluster_size": 1,
59
+ "moe_ep_size": 1,
60
+ "attn_tp_size": 1,
61
+ "attn_cp_size": 1,
62
+ "auto_parallel": false
63
+ },
64
+ "quantization": {
65
+ "quant_algo": "FP8",
66
+ "kv_cache_quant_algo": "FP8",
67
+ "group_size": 128,
68
+ "smoothquant_val": 0.5,
69
+ "clamp_val": null,
70
+ "use_meta_recipe": false,
71
+ "has_zero_point": false,
72
+ "pre_quant_scale": false,
73
+ "exclude_modules": [
74
+ "lm_head"
75
+ ]
76
+ },
77
+ "qk_layernorm": false,
78
+ "rotary_embedding_dim": 128,
79
+ "seq_length": 8192,
80
+ "qwen_type": "qwen3",
81
+ "moe_intermediate_size": 0,
82
+ "moe_shared_expert_intermediate_size": 0,
83
+ "tie_word_embeddings": false,
84
+ "model_type": "qwen"
85
+ }
Qwen3-14B-FP8-modelopt-pp1/rank0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd18f98a5e05fea710212c68a767866bdb2f8aefac77f4f964b3b8c3fb6625d
3
+ size 16324661448