WpythonW commited on
Commit
056face
·
verified ·
1 Parent(s): cb7e2b2

Delete Qwen3-14B-FP8-modelopt-tp2

Browse files
Qwen3-14B-FP8-modelopt-tp2/config.json DELETED
@@ -1,85 +0,0 @@
1
- {
2
- "producer": {
3
- "name": "modelopt",
4
- "version": "0.33.1"
5
- },
6
- "architecture": "Qwen3ForCausalLM",
7
- "dtype": "bfloat16",
8
- "logits_dtype": "float16",
9
- "num_hidden_layers": 40,
10
- "num_attention_heads": 40,
11
- "num_key_value_heads": 8,
12
- "hidden_size": 5120,
13
- "norm_epsilon": 1e-06,
14
- "vocab_size": 151936,
15
- "max_position_embeddings": 40960,
16
- "hidden_act": "silu",
17
- "use_parallel_embedding": true,
18
- "embedding_sharding_dim": 0,
19
- "head_size": 128,
20
- "intermediate_size": 17408,
21
- "position_embedding_type": "rope_gpt_neox",
22
- "share_embedding_table": false,
23
- "residual_mlp": false,
24
- "bias": false,
25
- "rotary_pct": 1.0,
26
- "rank": 1,
27
- "decoder": "qwen",
28
- "rmsnorm": true,
29
- "lm_head_bias": false,
30
- "mlp_bias": false,
31
- "attn_bias": false,
32
- "rotary_base": 1000000,
33
- "rotary_scaling": null,
34
- "disable_weight_only_quant_plugin": false,
35
- "num_labels": 1,
36
- "use_logn_attn": false,
37
- "mlp_only_layers": [],
38
- "decoder_sparse_step": 1,
39
- "moe": {
40
- "num_experts": 0,
41
- "shared_expert_intermediate_size": 0,
42
- "top_k": 0,
43
- "normalization_mode": 0,
44
- "sparse_mixer_epsilon": 0.01,
45
- "tp_mode": 0,
46
- "device_limited_n_group": 0,
47
- "device_limited_topk_group": 0,
48
- "device_limited_routed_scaling_factor": 1.0
49
- },
50
- "runtime_defaults": null,
51
- "mapping": {
52
- "world_size": 2,
53
- "gpus_per_node": 8,
54
- "cp_size": 1,
55
- "tp_size": 1,
56
- "pp_size": 2,
57
- "moe_tp_size": 1,
58
- "moe_cluster_size": 1,
59
- "moe_ep_size": 1,
60
- "attn_tp_size": 1,
61
- "attn_cp_size": 1,
62
- "auto_parallel": false
63
- },
64
- "quantization": {
65
- "quant_algo": "FP8",
66
- "kv_cache_quant_algo": "FP8",
67
- "group_size": 128,
68
- "smoothquant_val": 0.5,
69
- "clamp_val": null,
70
- "use_meta_recipe": false,
71
- "has_zero_point": false,
72
- "pre_quant_scale": false,
73
- "exclude_modules": [
74
- "lm_head"
75
- ]
76
- },
77
- "qk_layernorm": false,
78
- "rotary_embedding_dim": 128,
79
- "seq_length": 8192,
80
- "qwen_type": "qwen3",
81
- "moe_intermediate_size": 0,
82
- "moe_shared_expert_intermediate_size": 0,
83
- "tie_word_embeddings": false,
84
- "model_type": "qwen"
85
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Qwen3-14B-FP8-modelopt-tp2/rank0.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f6088622870fe1ef98be6c4cf4a8f301e2ee8ef5815a8106f50fe3ea97375fc
3
- size 8162324408
 
 
 
 
Qwen3-14B-FP8-modelopt-tp2/rank1.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4da55ec56f69d9454c10f6630589a2bcedcc6a7b92df02e28be11dce5d5333b
3
- size 8162335408