tommyp111 commited on
Commit
8107362
1 Parent(s): 09cea7d

Upload folder using huggingface_hub

Browse files
Files changed (28) hide show
  1. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.0.mlp.dense_h_to_4h/cfg.json +1 -0
  2. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.0.mlp.dense_h_to_4h/sae.safetensors +3 -0
  3. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.1.mlp.dense_h_to_4h/cfg.json +1 -0
  4. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.1.mlp.dense_h_to_4h/sae.safetensors +3 -0
  5. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.10.mlp.dense_h_to_4h/cfg.json +1 -0
  6. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.10.mlp.dense_h_to_4h/sae.safetensors +3 -0
  7. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.11.mlp.dense_h_to_4h/cfg.json +1 -0
  8. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.11.mlp.dense_h_to_4h/sae.safetensors +3 -0
  9. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.2.mlp.dense_h_to_4h/cfg.json +1 -0
  10. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.2.mlp.dense_h_to_4h/sae.safetensors +3 -0
  11. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.3.mlp.dense_h_to_4h/cfg.json +1 -0
  12. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.3.mlp.dense_h_to_4h/sae.safetensors +3 -0
  13. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.4.mlp.dense_h_to_4h/cfg.json +1 -0
  14. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.4.mlp.dense_h_to_4h/sae.safetensors +3 -0
  15. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.5.mlp.dense_h_to_4h/cfg.json +1 -0
  16. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.5.mlp.dense_h_to_4h/sae.safetensors +3 -0
  17. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.6.mlp.dense_h_to_4h/cfg.json +1 -0
  18. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.6.mlp.dense_h_to_4h/sae.safetensors +3 -0
  19. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.7.mlp.dense_h_to_4h/cfg.json +1 -0
  20. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.7.mlp.dense_h_to_4h/sae.safetensors +3 -0
  21. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.8.mlp.dense_h_to_4h/cfg.json +1 -0
  22. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.8.mlp.dense_h_to_4h/sae.safetensors +3 -0
  23. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.9.mlp.dense_h_to_4h/cfg.json +1 -0
  24. dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.9.mlp.dense_h_to_4h/sae.safetensors +3 -0
  25. dense_h_to_4h/latents-768/k-12/config.json +1 -0
  26. dense_h_to_4h/latents-768/k-12/lr_scheduler.pt +3 -0
  27. dense_h_to_4h/latents-768/k-12/optimizer.pt +3 -0
  28. dense_h_to_4h/latents-768/k-12/state.pt +3 -0
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.0.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.0.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a168c23b04bc19cd749ebab416d65ef8aa5ac1c7073dfe93abe181a021e98f72
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.1.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.1.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd4c0517f72d79c0273102cc00a437e16c8303b67a6cacb408f13a63c4d47295
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.10.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.10.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:791826fe73bbd9071425a9fd6bf48eb45fa55f9b80bac8dcf9e94c93d97b5459
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.11.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.11.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96208b4ff17da2a23e2ec095722ffaf4b2f3e9deff6ef514e4d3009b4d47f7ec
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.2.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.2.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:065fc18c033fb7dcbafa4eb10688261b88966b1551de80626dcb32ed343ad16a
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.3.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.3.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2207ae0762aa8258adbca0cddc97aa1318231da6fdff26b534b3efc5ddc0f03
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.4.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.4.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04dc4b9c17b8d1fa926edac98c4fa6446cad22fb2a4d7a8693b4c4aa9548f540
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.5.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.5.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:040059070594b40c345f30a9f1d828a4ee0c867962db25bfb9f313d42f445548
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.6.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.6.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63d704e31588d1cf55524ff86bce66d4c11e8e6a61a1840473d587c056a5b314
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.7.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.7.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a8c295ad3e2a4c447b2abdbc3e4928a58892e2db0a6e7e75424a7d8227fcff
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.8.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.8.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c334661f85b626642d56d472cbcc28b9a1b8f8c62ae4b1f848b70113fff9195
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.9.mlp.dense_h_to_4h/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false, "d_in": 3072}
dense_h_to_4h/latents-768/k-12/base_model.model.gpt_neox.layers.9.mlp.dense_h_to_4h/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a61d8fd476326b05d348d872d73de7730ad2a5783b586ad1fce3e84cf3c18de0
3
+ size 18890048
dense_h_to_4h/latents-768/k-12/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"expansion_factor": 32, "normalize_decoder": true, "num_latents": 768, "k": 12, "multi_topk": false}, "batch_size": 8, "grad_acc_steps": 1, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 500000, "hookpoints": ["base_model.model.gpt_neox.layers.0.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.1.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.2.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.3.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.4.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.5.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.6.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.7.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.8.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.9.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.10.mlp.dense_h_to_4h", "base_model.model.gpt_neox.layers.11.mlp.dense_h_to_4h"], "layers": [], "layer_stride": 1, "distribute_modules": false, "save_every": 500, "log_to_wandb": true, "run_name": null, "wandb_log_frequency": 1}
dense_h_to_4h/latents-768/k-12/lr_scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d3f570964fc26975747719e773754f4abdad3ed7eb9e03c926d52ecfb1557ae
3
+ size 1268
dense_h_to_4h/latents-768/k-12/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:206c1c92a8dde0869f75b82888763fa1d7fcd228ff6c12b10add37da06738e24
3
+ size 113881850
dense_h_to_4h/latents-768/k-12/state.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b71cffbf5751635cd15aea6fe47f206c4b8f7ba51c401cdf54c9ebdb2c85344e
3
+ size 77650