| config: configs/vq_8k_siglip_b_res_p02_pw15_enc.yaml | |
| exp_index: | |
| data_path: /mnt/bn/cloud-project-lq/code/liuyh/data/vq_data/train | |
| cloud_save_path: experiments/tokenizer | |
| no_local_save: true | |
| vq_model: VQ-16 | |
| vq_ckpt: | |
| finetune: false | |
| finetune_decoder: false | |
| model_weight_strict: true | |
| ema: false | |
| codebook_size: 8192 | |
| codebook_embed_dim: 8 | |
| codebook_l2_norm: true | |
| codebook_weight: 1.0 | |
| entropy_loss_ratio: 0.0 | |
| vq_loss_ratio: 1.0 | |
| commit_loss_beta: 0.25 | |
| reconstruction_weight: 1.0 | |
| reconstruction_loss: l2 | |
| kl_loss_weight: 1e-06 | |
| tau: 0.07 | |
| num_codebooks: 1 | |
| perceptual_weight: 1.0 | |
| perceptual_loss: vgg | |
| perceptual_model: vgg | |
| perceptual_dino_variants: depth12_no_train | |
| perceptual_intermediate_loss: false | |
| perceptual_logit_loss: false | |
| perceptual_resize: false | |
| perceptual_warmup: 10000 | |
| disc_weight: 0.2 | |
| disc_start: 40000 | |
| disc_dim: 64 | |
| disc_type: dino | |
| disc_loss: hinge | |
| gen_loss: hinge | |
| lecam_loss_weight: 0.001 | |
| use_diff_aug: true | |
| disc_cr_loss_weight: 4.0 | |
| disc_adaptive_weight: false | |
| compile: false | |
| dropout_p: 0.0 | |
| results_dir: ./logs/task/detailflow_demo_task_256token | |
| dataset: imagenet | |
| image_size: 256 | |
| epochs: 250 | |
| optimizer: adam | |
| lr: 1.0e-4 | |
| lr_warmup_epochs: 1 | |
| lr_scheduler: cosine | |
| weight_decay: 0.0001 | |
| beta1: 0.9 | |
| beta2: 0.95 | |
| max_grad_norm: 1.0 | |
| global_batch_size: 256 | |
| global_seed: 42 | |
| num_workers: 16 | |
| log_every: 50 | |
| vis_every: 5000 | |
| ckpt_every: 5000 | |
| save_epochs: 1 | |
| gradient_accumulation_steps: 1 | |
| mixed_precision: bf16 | |
| enc_type: siglip2 | |
| dec_type: siglip2 | |
| num_latent_tokens: 256 | |
| encoder_model: siglip2_base | |
| decoder_model: siglip2_base | |
| encoder_tuning_method: full | |
| decoder_tuning_method: full | |
| encoder_pretrained: true | |
| decoder_pretrained: false | |
| encoder_patch_size: 16 | |
| decoder_patch_size: 16 | |
| repa: false | |
| repa_model: siglip2 | |
| repa_patch_size: 16 | |
| repa_proj_dim: 1024 | |
| repa_loss_weight: 0.5 | |
| repa_align: global | |
| repa_layer_indices: 1 | |
| resume_from_newest_ckpt: true | |
| gradient_checkpointing_encoder: false | |
| gradient_checkpointing_decoder: false | |
| debug_mode: false | |
| content_degradation: resolution_power | |
| degradation_prob: 0.2 | |
| degradation_loss_res: 224 | |
| degradation_power: 1.5 | |
| causal_encoder: true | |
| causal_decoder: false | |
| max_image_size: 256 | |
| min_image_size: 256 | |
| dynamic_max_image_size: | |
| dynamic_resolution_prob: 0 | |
| max_resolution_prob: 0 | |
| adjust_bs_by_resolution: false | |
| group_size: 8 | |
| global_token_loss_weight: 1.0 | |
| correction_training: true | |
| causal_num: | |
| rank: 0 | |
| world_size: 16 | |
| gpu: 0 | |
| dist_url: env:// | |
| distributed: true | |
| dist_backend: nccl | |