recursive1 / config.json
m-ric's picture
Upload TRMForCausalLM
a1c4483 verified
raw
history blame contribute delete
962 Bytes
{
"architectures": [
"TRMForCausalLM"
],
"dtype": "bfloat16",
"model_type": "trm",
"pad_token_id": 50283,
"transformers_version": "4.57.1",
"trm_config_dict": {
"H_layers": 0,
"batch_size": 1,
"causal": true,
"dropout": 0.2,
"expansion": 4,
"forward_dtype": "bfloat16",
"freeze_embeddings": false,
"halt_exploration_prob": 0.1,
"halt_max_steps": 8,
"hidden_size": 768,
"loss": {
"loss_type": "stablemax_cross_entropy",
"name": "losses@ACTLossHead"
},
"name": "recursive_reasoning.trm@TinyRecursiveModel",
"no_ACT_continue": true,
"num_heads": 12,
"num_layers": 8,
"num_puzzle_identifiers": 1,
"pos_encodings": "rope",
"pretrained_embeddings_model": "Alibaba-NLP/gte-modernbert-base",
"puzzle_emb_len": 0,
"puzzle_emb_ndim": 0,
"residual_recursion": false,
"seq_len": 512,
"vocab_size": 50368,
"y_cycles": 2,
"z_cycles": 3
}
}