brandonbeiler commited on
Commit
6046303
·
verified ·
1 Parent(s): 33fd097

Update config with default values that vLLM requires

Browse files
Files changed (1) hide show
  1. config.json +115 -1
config.json CHANGED
@@ -15,28 +15,84 @@
15
  "freeze_vision": false,
16
  "hidden_size": 5120,
17
  "llm_config": {
 
 
 
18
  "architectures": [
19
  "Qwen2ForCausalLM"
20
  ],
21
  "attention_dropout": 0.0,
22
  "attn_implementation": "flash_attention_2",
 
 
23
  "bos_token_id": 151643,
 
 
 
 
 
 
 
24
  "eos_token_id": 151645,
 
 
 
 
25
  "hidden_act": "silu",
26
  "hidden_size": 5120,
 
 
 
 
27
  "initializer_range": 0.02,
28
  "intermediate_size": 27648,
 
 
 
 
 
 
 
 
29
  "max_position_embeddings": 32768,
30
  "max_window_layers": 70,
 
31
  "model_type": "qwen2",
 
32
  "num_attention_heads": 40,
 
 
33
  "num_hidden_layers": 64,
34
  "num_key_value_heads": 8,
 
 
 
 
 
 
 
 
 
 
 
 
35
  "rms_norm_eps": 1e-06,
36
  "rope_scaling": null,
37
  "rope_theta": 1000000.0,
 
38
  "sliding_window": 131072,
 
 
 
 
 
 
 
 
 
39
  "torch_dtype": "bfloat16",
 
 
40
  "use_bfloat16": true,
41
  "use_cache": false,
42
  "use_sliding_window": false,
@@ -280,28 +336,86 @@
280
  "use_llm_lora": 0,
281
  "use_thumbnail": true,
282
  "vision_config": {
 
 
 
283
  "architectures": [
284
  "InternVisionModel"
285
  ],
286
  "attention_dropout": 0.0,
 
 
 
 
 
 
 
 
287
  "drop_path_rate": 0.1,
288
  "dropout": 0.0,
 
 
 
 
 
 
 
289
  "hidden_act": "gelu",
290
  "hidden_size": 3200,
 
 
 
 
291
  "image_size": 448,
292
  "initializer_factor": 0.1,
293
  "initializer_range": 1e-10,
294
  "intermediate_size": 12800,
 
 
 
 
 
 
295
  "layer_norm_eps": 1e-06,
296
- "model_type": "",
 
 
 
 
297
  "norm_type": "rms_norm",
298
  "num_attention_heads": 25,
 
 
299
  "num_channels": 3,
300
  "num_hidden_layers": 45,
 
 
 
 
 
301
  "patch_size": 14,
 
 
 
302
  "qk_normalization": true,
303
  "qkv_bias": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  "torch_dtype": "bfloat16",
 
 
305
  "use_bfloat16": true,
306
  "use_flash_attn": true
307
  }
 
15
  "freeze_vision": false,
16
  "hidden_size": 5120,
17
  "llm_config": {
18
+ "_attn_implementation_autoset": true,
19
+ "_name_or_path": "",
20
+ "add_cross_attention": false,
21
  "architectures": [
22
  "Qwen2ForCausalLM"
23
  ],
24
  "attention_dropout": 0.0,
25
  "attn_implementation": "flash_attention_2",
26
+ "bad_words_ids": null,
27
+ "begin_suppress_tokens": null,
28
  "bos_token_id": 151643,
29
+ "chunk_size_feed_forward": 0,
30
+ "cross_attention_hidden_size": null,
31
+ "decoder_start_token_id": null,
32
+ "diversity_penalty": 0.0,
33
+ "do_sample": false,
34
+ "early_stopping": false,
35
+ "encoder_no_repeat_ngram_size": 0,
36
  "eos_token_id": 151645,
37
+ "exponential_decay_length_penalty": null,
38
+ "finetuning_task": null,
39
+ "forced_bos_token_id": null,
40
+ "forced_eos_token_id": null,
41
  "hidden_act": "silu",
42
  "hidden_size": 5120,
43
+ "id2label": {
44
+ "0": "LABEL_0",
45
+ "1": "LABEL_1"
46
+ },
47
  "initializer_range": 0.02,
48
  "intermediate_size": 27648,
49
+ "is_decoder": false,
50
+ "is_encoder_decoder": false,
51
+ "label2id": {
52
+ "LABEL_0": 0,
53
+ "LABEL_1": 1
54
+ },
55
+ "length_penalty": 1.0,
56
+ "max_length": 20,
57
  "max_position_embeddings": 32768,
58
  "max_window_layers": 70,
59
+ "min_length": 0,
60
  "model_type": "qwen2",
61
+ "no_repeat_ngram_size": 0,
62
  "num_attention_heads": 40,
63
+ "num_beam_groups": 1,
64
+ "num_beams": 1,
65
  "num_hidden_layers": 64,
66
  "num_key_value_heads": 8,
67
+ "num_return_sequences": 1,
68
+ "output_attentions": false,
69
+ "output_hidden_states": false,
70
+ "output_scores": false,
71
+ "pad_token_id": null,
72
+ "prefix": null,
73
+ "problem_type": null,
74
+ "pruned_heads": {},
75
+ "remove_invalid_values": false,
76
+ "repetition_penalty": 1.0,
77
+ "return_dict": true,
78
+ "return_dict_in_generate": false,
79
  "rms_norm_eps": 1e-06,
80
  "rope_scaling": null,
81
  "rope_theta": 1000000.0,
82
+ "sep_token_id": null,
83
  "sliding_window": 131072,
84
+ "suppress_tokens": null,
85
+ "task_specific_params": null,
86
+ "temperature": 1.0,
87
+ "tf_legacy_loss": false,
88
+ "tie_encoder_decoder": false,
89
+ "tie_word_embeddings": false,
90
+ "tokenizer_class": null,
91
+ "top_k": 50,
92
+ "top_p": 1.0,
93
  "torch_dtype": "bfloat16",
94
+ "torchscript": false,
95
+ "typical_p": 1.0,
96
  "use_bfloat16": true,
97
  "use_cache": false,
98
  "use_sliding_window": false,
 
336
  "use_llm_lora": 0,
337
  "use_thumbnail": true,
338
  "vision_config": {
339
+ "_attn_implementation_autoset": true,
340
+ "_name_or_path": "",
341
+ "add_cross_attention": false,
342
  "architectures": [
343
  "InternVisionModel"
344
  ],
345
  "attention_dropout": 0.0,
346
+ "bad_words_ids": null,
347
+ "begin_suppress_tokens": null,
348
+ "bos_token_id": null,
349
+ "chunk_size_feed_forward": 0,
350
+ "cross_attention_hidden_size": null,
351
+ "decoder_start_token_id": null,
352
+ "diversity_penalty": 0.0,
353
+ "do_sample": false,
354
  "drop_path_rate": 0.1,
355
  "dropout": 0.0,
356
+ "early_stopping": false,
357
+ "encoder_no_repeat_ngram_size": 0,
358
+ "eos_token_id": null,
359
+ "exponential_decay_length_penalty": null,
360
+ "finetuning_task": null,
361
+ "forced_bos_token_id": null,
362
+ "forced_eos_token_id": null,
363
  "hidden_act": "gelu",
364
  "hidden_size": 3200,
365
+ "id2label": {
366
+ "0": "LABEL_0",
367
+ "1": "LABEL_1"
368
+ },
369
  "image_size": 448,
370
  "initializer_factor": 0.1,
371
  "initializer_range": 1e-10,
372
  "intermediate_size": 12800,
373
+ "is_decoder": false,
374
+ "is_encoder_decoder": false,
375
+ "label2id": {
376
+ "LABEL_0": 0,
377
+ "LABEL_1": 1
378
+ },
379
  "layer_norm_eps": 1e-06,
380
+ "length_penalty": 1.0,
381
+ "max_length": 20,
382
+ "min_length": 0,
383
+ "model_type": "intern_vit_6b",
384
+ "no_repeat_ngram_size": 0,
385
  "norm_type": "rms_norm",
386
  "num_attention_heads": 25,
387
+ "num_beam_groups": 1,
388
+ "num_beams": 1,
389
  "num_channels": 3,
390
  "num_hidden_layers": 45,
391
+ "num_return_sequences": 1,
392
+ "output_attentions": false,
393
+ "output_hidden_states": false,
394
+ "output_scores": false,
395
+ "pad_token_id": null,
396
  "patch_size": 14,
397
+ "prefix": null,
398
+ "problem_type": null,
399
+ "pruned_heads": {},
400
  "qk_normalization": true,
401
  "qkv_bias": false,
402
+ "remove_invalid_values": false,
403
+ "repetition_penalty": 1.0,
404
+ "return_dict": true,
405
+ "return_dict_in_generate": false,
406
+ "sep_token_id": null,
407
+ "suppress_tokens": null,
408
+ "task_specific_params": null,
409
+ "temperature": 1.0,
410
+ "tf_legacy_loss": false,
411
+ "tie_encoder_decoder": false,
412
+ "tie_word_embeddings": true,
413
+ "tokenizer_class": null,
414
+ "top_k": 50,
415
+ "top_p": 1.0,
416
  "torch_dtype": "bfloat16",
417
+ "torchscript": false,
418
+ "typical_p": 1.0,
419
  "use_bfloat16": true,
420
  "use_flash_attn": true
421
  }