| { | |
| "architectures": [ | |
| "TRMForCausalLM" | |
| ], | |
| "dtype": "bfloat16", | |
| "model_type": "trm", | |
| "pad_token_id": 50283, | |
| "transformers_version": "4.57.1", | |
| "trm_config_dict": { | |
| "H_layers": 0, | |
| "batch_size": 1, | |
| "causal": true, | |
| "dropout": 0.2, | |
| "expansion": 4, | |
| "forward_dtype": "bfloat16", | |
| "freeze_embeddings": false, | |
| "halt_exploration_prob": 0.1, | |
| "halt_max_steps": 8, | |
| "hidden_size": 768, | |
| "loss": { | |
| "loss_type": "stablemax_cross_entropy", | |
| "name": "losses@ACTLossHead" | |
| }, | |
| "name": "recursive_reasoning.trm@TinyRecursiveModel", | |
| "no_ACT_continue": true, | |
| "num_heads": 12, | |
| "num_layers": 8, | |
| "num_puzzle_identifiers": 1, | |
| "pos_encodings": "rope", | |
| "pretrained_embeddings_model": "Alibaba-NLP/gte-modernbert-base", | |
| "puzzle_emb_len": 0, | |
| "puzzle_emb_ndim": 0, | |
| "residual_recursion": false, | |
| "seq_len": 512, | |
| "vocab_size": 50368, | |
| "y_cycles": 2, | |
| "z_cycles": 3 | |
| } | |
| } | |