ArthurConmy commited on
Commit
868f01b
1 Parent(s): 614a7d2

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +38 -38
config.json CHANGED
@@ -1,38 +1,38 @@
1
- {'act_fn': None,
2
- 'attention_dir': 'causal',
3
- 'attn_only': True,
4
- 'attn_types': None,
5
- 'checkpoint_index': None,
6
- 'checkpoint_label_type': None,
7
- 'checkpoint_value': None,
8
- 'd_head': 32,
9
- 'd_mlp': None,
10
- 'd_model': 256,
11
- 'd_vocab': 50259,
12
- 'd_vocab_out': 50259,
13
- 'device': 'cuda',
14
- 'eps': 1e-05,
15
- 'final_rms': False,
16
- 'from_checkpoint': False,
17
- 'init_mode': 'gpt2',
18
- 'init_weights': True,
19
- 'initializer_range': 0.05,
20
- 'model_name': 'custom',
21
- 'n_ctx': 2048,
22
- 'n_heads': 8,
23
- 'n_layers': 2,
24
- 'n_params': 524288,
25
- 'normalization_type': 'LN',
26
- 'original_architecture': None,
27
- 'parallel_attn_mlp': False,
28
- 'positional_embedding_type': 'shortformer',
29
- 'rotary_dim': None,
30
- 'scale_attn_by_inverse_layer_idx': False,
31
- 'seed': None,
32
- 'tokenizer_name': None,
33
- 'use_attn_result': True,
34
- 'use_attn_scale': True,
35
- 'use_hook_tokens': False,
36
- 'use_local_attn': False,
37
- 'use_split_qkv_input': False,
38
- 'window_size': None}
 
1
+ {"act_fn": None,
2
+ "attention_dir": "causal",
3
+ "attn_only": True,
4
+ "attn_types": None,
5
+ "checkpoint_index": None,
6
+ "checkpoint_label_type": None,
7
+ "checkpoint_value": None,
8
+ "d_head": 32,
9
+ "d_mlp": None,
10
+ "d_model": 256,
11
+ "d_vocab": 50259,
12
+ "d_vocab_out": 50259,
13
+ "device": "cuda",
14
+ "eps": 1e-05,
15
+ "final_rms": False,
16
+ "from_checkpoint": False,
17
+ "init_mode": "gpt2",
18
+ "init_weights": True,
19
+ "initializer_range": 0.05,
20
+ "model_name": "custom",
21
+ "n_ctx": 2048,
22
+ "n_heads": 8,
23
+ "n_layers": 2,
24
+ "n_params": 524288,
25
+ "normalization_type": "LN",
26
+ "original_architecture": None,
27
+ "parallel_attn_mlp": False,
28
+ "positional_embedding_type": "shortformer",
29
+ "rotary_dim": None,
30
+ "scale_attn_by_inverse_layer_idx": False,
31
+ "seed": None,
32
+ "tokenizer_name": None,
33
+ "use_attn_result": True,
34
+ "use_attn_scale": True,
35
+ "use_hook_tokens": False,
36
+ "use_local_attn": False,
37
+ "use_split_qkv_input": False,
38
+ "window_size": None}