m-ric commited on
Commit
a1c4483
·
verified ·
1 Parent(s): 5991ae0

Upload TRMForCausalLM

Browse files
Files changed (2) hide show
  1. config.json +7 -9
  2. model.safetensors +2 -2
config.json CHANGED
@@ -10,32 +10,30 @@
10
  "H_layers": 0,
11
  "batch_size": 1,
12
  "causal": true,
13
- "dropout": 0.1,
14
  "expansion": 4,
15
  "forward_dtype": "bfloat16",
16
  "freeze_embeddings": false,
17
- "full_grad_recursion": true,
18
  "halt_exploration_prob": 0.1,
19
  "halt_max_steps": 8,
20
  "hidden_size": 768,
21
  "loss": {
22
- "loss_type": "softmax_cross_entropy",
23
  "name": "losses@ACTLossHead"
24
  },
25
- "mlp_t": false,
26
  "name": "recursive_reasoning.trm@TinyRecursiveModel",
27
  "no_ACT_continue": true,
28
- "num_heads": 8,
29
- "num_layers": 4,
30
  "num_puzzle_identifiers": 1,
31
  "pos_encodings": "rope",
32
  "pretrained_embeddings_model": "Alibaba-NLP/gte-modernbert-base",
33
  "puzzle_emb_len": 0,
34
  "puzzle_emb_ndim": 0,
35
- "residual_recursion": true,
36
- "seq_len": 256,
37
  "vocab_size": 50368,
38
  "y_cycles": 2,
39
- "z_cycles": 2
40
  }
41
  }
 
10
  "H_layers": 0,
11
  "batch_size": 1,
12
  "causal": true,
13
+ "dropout": 0.2,
14
  "expansion": 4,
15
  "forward_dtype": "bfloat16",
16
  "freeze_embeddings": false,
 
17
  "halt_exploration_prob": 0.1,
18
  "halt_max_steps": 8,
19
  "hidden_size": 768,
20
  "loss": {
21
+ "loss_type": "stablemax_cross_entropy",
22
  "name": "losses@ACTLossHead"
23
  },
 
24
  "name": "recursive_reasoning.trm@TinyRecursiveModel",
25
  "no_ACT_continue": true,
26
+ "num_heads": 12,
27
+ "num_layers": 8,
28
  "num_puzzle_identifiers": 1,
29
  "pos_encodings": "rope",
30
  "pretrained_embeddings_model": "Alibaba-NLP/gte-modernbert-base",
31
  "puzzle_emb_len": 0,
32
  "puzzle_emb_ndim": 0,
33
+ "residual_recursion": false,
34
+ "seq_len": 512,
35
  "vocab_size": 50368,
36
  "y_cycles": 2,
37
+ "z_cycles": 3
38
  }
39
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf1b29f3e306475a0f20b9ea682611acabb21f017ce4e20fc4e348162d9ed862
3
- size 422719008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a90414327a4218c51b6ad370d10fdb3719d49fb9c7ddc7750b42f119ec5cfadf
3
+ size 535967176