diff --git a/backwards_TL/22ninmut/100003840/cfg.json b/backwards_TL/22ninmut/100003840/cfg.json deleted file mode 100644 index 481b05ab5e8c5d28de9186eb3048b303437b3bbf..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.0.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 0, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L1", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1.0, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L0_hook_resid_post_L1_1_0", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/22ninmut", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/22ninmut/100003840/sae_weights.safetensors b/backwards_TL/22ninmut/100003840/sae_weights.safetensors deleted file mode 100644 index aa456fad807f177cd3a80da4e4a025d2f66ec847..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eaf688bedd969130bc40dabef5ecd4d20d1650527d890da1486a3009cc52703e -size 37801344 diff --git a/backwards_TL/22ninmut/100003840/sparsity.safetensors b/backwards_TL/22ninmut/100003840/sparsity.safetensors deleted file mode 100644 index 6a80499a2f05d43d21b14979046bce3847550b7b..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3eccde1a921dbea545c2838d42de3e05112412e5ef0ff625013f1152fc54f41d -size 24656 diff --git a/backwards_TL/22ninmut/200003584/cfg.json b/backwards_TL/22ninmut/200003584/cfg.json deleted file mode 100644 index 481b05ab5e8c5d28de9186eb3048b303437b3bbf..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.0.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 0, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L1", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1.0, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L0_hook_resid_post_L1_1_0", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/22ninmut", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/22ninmut/200003584/sae_weights.safetensors b/backwards_TL/22ninmut/200003584/sae_weights.safetensors deleted file mode 100644 index 52492436b9b1189a55c00cd02dca8471ed62e6d4..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7e2e760369b076b49810ebb2e9856dbfb0cba2a555021dc7b1f4b181056d125 -size 37801344 diff --git a/backwards_TL/22ninmut/200003584/sparsity.safetensors b/backwards_TL/22ninmut/200003584/sparsity.safetensors deleted file mode 100644 index 4cdbaeebcb461eba5bfa95342bd2fbde0f0ca9f1..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:174f1e22cce1b01e437592772e23c4c46d594bdd06361197c1f1d13c2de0adbe -size 24656 diff --git a/backwards_TL/22ninmut/300003328/cfg.json b/backwards_TL/22ninmut/300003328/cfg.json deleted file mode 100644 index 481b05ab5e8c5d28de9186eb3048b303437b3bbf..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.0.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 0, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L1", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1.0, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L0_hook_resid_post_L1_1_0", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/22ninmut", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/22ninmut/300003328/sae_weights.safetensors b/backwards_TL/22ninmut/300003328/sae_weights.safetensors deleted file mode 100644 index 3d69d2dde3bd09da95b104c22e16a7087efc656d..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:90c6a2bc0fb1221f1a297e78f914dcd4431d0594d665b9e6971ad53ad1e3a4f9 -size 37801344 diff --git a/backwards_TL/22ninmut/300003328/sparsity.safetensors b/backwards_TL/22ninmut/300003328/sparsity.safetensors deleted file mode 100644 index d63a442f19ab5dec0951ac8813348a08f41c2ce2..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:82ab0163c40693019a4f9971942ad6b2bc7864077175efae7d2b48bc2772af5d -size 24656 diff --git a/backwards_TL/22ninmut/400003072/cfg.json b/backwards_TL/22ninmut/400003072/cfg.json deleted file mode 100644 index 481b05ab5e8c5d28de9186eb3048b303437b3bbf..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.0.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 0, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L1", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1.0, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L0_hook_resid_post_L1_1_0", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/22ninmut", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/22ninmut/400003072/sae_weights.safetensors b/backwards_TL/22ninmut/400003072/sae_weights.safetensors deleted file mode 100644 index 1179962c3a12d3e8cfe747aa314d2618ffd16f08..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3523d121fcf5fba450df3a43ec1d64c3175d40673953b7525d740e02c61d1470 -size 37801344 diff --git a/backwards_TL/22ninmut/400003072/sparsity.safetensors b/backwards_TL/22ninmut/400003072/sparsity.safetensors deleted file mode 100644 index a0c89388d6c4f398877f5b23a99a97141d4ffe41..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed8bdac9956cf39cd59d7407ca55c418010f28ef8e30c02221bcc3d2bac90e88 -size 24656 diff --git a/backwards_TL/22ninmut/final_500002816/.cfg.json.swp b/backwards_TL/22ninmut/final_500002816/.cfg.json.swp deleted file mode 100644 index 0ee340d20ffa189c1b170e84c0f2752c08db2560..0000000000000000000000000000000000000000 Binary files a/backwards_TL/22ninmut/final_500002816/.cfg.json.swp and /dev/null differ diff --git a/backwards_TL/22ninmut/final_500002816/cfg.json b/backwards_TL/22ninmut/final_500002816/cfg.json deleted file mode 100644 index 481b05ab5e8c5d28de9186eb3048b303437b3bbf..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.0.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 0, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L1", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1.0, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L0_hook_resid_post_L1_1_0", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/22ninmut", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/22ninmut/final_500002816/sae_weights.safetensors b/backwards_TL/22ninmut/final_500002816/sae_weights.safetensors deleted file mode 100644 index 8ce597711368306cc3d966ec309aa82851c242b0..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f1f5172f0847c6a15fb993ea9c10fea00470bdfdb436eb9f5f3cd60c746e11d -size 37801344 diff --git a/backwards_TL/22ninmut/final_500002816/sparsity.safetensors b/backwards_TL/22ninmut/final_500002816/sparsity.safetensors deleted file mode 100644 index 620847a5f98dcb4fa1c4247afb0d399d7d0ebb90..0000000000000000000000000000000000000000 --- a/backwards_TL/22ninmut/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53bb6ce0541567e2888b72807252ecd39b2d7763ce2dd9041f362a8913659437 -size 24656 diff --git a/backwards_TL/7oj8g6c4/100003840/cfg.json b/backwards_TL/7oj8g6c4/100003840/cfg.json deleted file mode 100644 index ae54992ea0863c60482a816835eec98835f52c5e..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.10.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 10, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L11", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L10_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/7oj8g6c4", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/7oj8g6c4/100003840/sae_weights.safetensors b/backwards_TL/7oj8g6c4/100003840/sae_weights.safetensors deleted file mode 100644 index 9484257f06984efb4db5bbfd406553ff7449dd4a..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06c7f1afee5174577ebf48c4ba2b2623c6702f5efca1f78a234ad93fc2b430f9 -size 37801344 diff --git a/backwards_TL/7oj8g6c4/100003840/sparsity.safetensors b/backwards_TL/7oj8g6c4/100003840/sparsity.safetensors deleted file mode 100644 index 3af436766accbf713b34951f57fc548458c2926c..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0df1c1f57067e205693519d17ea3ef8475d08e59d6fba88702ccaba5eba7e9de -size 24656 diff --git a/backwards_TL/7oj8g6c4/200003584/cfg.json b/backwards_TL/7oj8g6c4/200003584/cfg.json deleted file mode 100644 index ae54992ea0863c60482a816835eec98835f52c5e..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.10.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 10, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L11", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L10_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/7oj8g6c4", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/7oj8g6c4/200003584/sae_weights.safetensors b/backwards_TL/7oj8g6c4/200003584/sae_weights.safetensors deleted file mode 100644 index 9c4b77d183eb1435d158fea15bb4284312b4389c..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2469b8f471cef975f3bb34af4032fe84028d291fe2e21caa5eb413c9decab866 -size 37801344 diff --git a/backwards_TL/7oj8g6c4/200003584/sparsity.safetensors b/backwards_TL/7oj8g6c4/200003584/sparsity.safetensors deleted file mode 100644 index 293930d39718bbf55bc5fb8ce013b6ba9fa43f01..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9cf0a539cc13b22142a55193ddd9cf21e650016917f95265017723d483907c46 -size 24656 diff --git a/backwards_TL/7oj8g6c4/300003328/cfg.json b/backwards_TL/7oj8g6c4/300003328/cfg.json deleted file mode 100644 index ae54992ea0863c60482a816835eec98835f52c5e..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.10.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 10, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L11", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L10_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/7oj8g6c4", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/7oj8g6c4/300003328/sae_weights.safetensors b/backwards_TL/7oj8g6c4/300003328/sae_weights.safetensors deleted file mode 100644 index 24b2412ed1a5b990ee07175df203f7283cb4361e..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5108e31bc436352cee16984fd02e9694fee65469604e79827fd67c96e5577cbb -size 37801344 diff --git a/backwards_TL/7oj8g6c4/300003328/sparsity.safetensors b/backwards_TL/7oj8g6c4/300003328/sparsity.safetensors deleted file mode 100644 index f1364217c3d8217f609a87e39f9f6e61a1d63a7f..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:564c97f8a8520504b79659fb02ee5ce0a8171ad865184fe7c61e17aedf6593dd -size 24656 diff --git a/backwards_TL/7oj8g6c4/400003072/cfg.json b/backwards_TL/7oj8g6c4/400003072/cfg.json deleted file mode 100644 index ae54992ea0863c60482a816835eec98835f52c5e..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.10.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 10, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L11", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L10_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/7oj8g6c4", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/7oj8g6c4/400003072/sae_weights.safetensors b/backwards_TL/7oj8g6c4/400003072/sae_weights.safetensors deleted file mode 100644 index 6f7e216ecda0fa9c112aabce98dc4c5ce6340edd..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4f1faf430cb13561449cdd2c921a6f3f1c3208e786954644403c6c502629d630 -size 37801344 diff --git a/backwards_TL/7oj8g6c4/400003072/sparsity.safetensors b/backwards_TL/7oj8g6c4/400003072/sparsity.safetensors deleted file mode 100644 index 950dfcc73e173af4b2bb37325b9bbfc72008e201..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1746ddb7c85e37d01196f381bf4c52cd3f4438e7814aa985205fadfc8bac887d -size 24656 diff --git a/backwards_TL/7oj8g6c4/final_500002816/cfg.json b/backwards_TL/7oj8g6c4/final_500002816/cfg.json deleted file mode 100644 index ae54992ea0863c60482a816835eec98835f52c5e..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.10.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 10, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L11", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L10_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/7oj8g6c4", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/7oj8g6c4/final_500002816/sae_weights.safetensors b/backwards_TL/7oj8g6c4/final_500002816/sae_weights.safetensors deleted file mode 100644 index 7a83eec2466cf02efce13d49b9584727c63d1e59..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46b26bfa7fd1c40589596c2635fb7b8f9f66733d37106eceef2ad086cee13357 -size 37801344 diff --git a/backwards_TL/7oj8g6c4/final_500002816/sparsity.safetensors b/backwards_TL/7oj8g6c4/final_500002816/sparsity.safetensors deleted file mode 100644 index 950605063bf04b0db75ae5f494a08e4fb7561375..0000000000000000000000000000000000000000 --- a/backwards_TL/7oj8g6c4/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:664f11b2ae8eb9e56008562107a2e88435b2985b96f83e0934c16005cf7dc7e8 -size 24656 diff --git a/backwards_TL/djw9vsut/100003840/cfg.json b/backwards_TL/djw9vsut/100003840/cfg.json deleted file mode 100644 index 7a31d30ec1d468bdacdb4b32d12c742f8842e980..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.8.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 8, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L9", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L8_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/djw9vsut", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/djw9vsut/100003840/sae_weights.safetensors b/backwards_TL/djw9vsut/100003840/sae_weights.safetensors deleted file mode 100644 index 640bfea3ac5b64e63907341e50ec7ec081670b02..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c88a94f94fa76d2443334202b792bcb2b58cc09fba0d518254abc0d6150ec262 -size 37801344 diff --git a/backwards_TL/djw9vsut/100003840/sparsity.safetensors b/backwards_TL/djw9vsut/100003840/sparsity.safetensors deleted file mode 100644 index dd66a17df660aced8ff6cf40807a0ed445a5cf55..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b992501a5f990ac63c38f3d1056c26dbd76feb5be2965d9a48981d77940215b -size 24656 diff --git a/backwards_TL/djw9vsut/200003584/cfg.json b/backwards_TL/djw9vsut/200003584/cfg.json deleted file mode 100644 index 7a31d30ec1d468bdacdb4b32d12c742f8842e980..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.8.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 8, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L9", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L8_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/djw9vsut", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/djw9vsut/200003584/sae_weights.safetensors b/backwards_TL/djw9vsut/200003584/sae_weights.safetensors deleted file mode 100644 index 70a59f514c3c7147c80324601b8120026ed686fb..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3bfdab5cd4df36ad506e336e3fe8b54514a3870a4ce5c2425994e5bb50207b4d -size 37801344 diff --git a/backwards_TL/djw9vsut/200003584/sparsity.safetensors b/backwards_TL/djw9vsut/200003584/sparsity.safetensors deleted file mode 100644 index 87b024d02d2cb4b3a060a2c163c31b6bcb5f81cc..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4c187b98557ddbc032ad09c447ddb0f77c2fe6d542e843386277bc82cca99118 -size 24656 diff --git a/backwards_TL/djw9vsut/300003328/cfg.json b/backwards_TL/djw9vsut/300003328/cfg.json deleted file mode 100644 index 7a31d30ec1d468bdacdb4b32d12c742f8842e980..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.8.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 8, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L9", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L8_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/djw9vsut", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/djw9vsut/300003328/sae_weights.safetensors b/backwards_TL/djw9vsut/300003328/sae_weights.safetensors deleted file mode 100644 index 5d73d5e4553c0b4bab764083ea87914da804e4e9..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3ac0d2b9f0886da1b393d1b148c06643d75206bc106c77a5201a98e22e5c2e06 -size 37801344 diff --git a/backwards_TL/djw9vsut/300003328/sparsity.safetensors b/backwards_TL/djw9vsut/300003328/sparsity.safetensors deleted file mode 100644 index 8b7b66d9441543c2658917b7a1c93d6cb17ade7e..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:057e839a6f93fa3905a99dc4dd65357de53b5ad3884c032e8d3723cd89c47ff8 -size 24656 diff --git a/backwards_TL/djw9vsut/400003072/cfg.json b/backwards_TL/djw9vsut/400003072/cfg.json deleted file mode 100644 index 7a31d30ec1d468bdacdb4b32d12c742f8842e980..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.8.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 8, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L9", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L8_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/djw9vsut", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/djw9vsut/400003072/sae_weights.safetensors b/backwards_TL/djw9vsut/400003072/sae_weights.safetensors deleted file mode 100644 index 496d6ab0aaae92ec58de37c54dcb4944d10e1a25..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c8d17604d5fb6be2b153dc52f6ad34fac7ab51eced26c5c059d395b88d582365 -size 37801344 diff --git a/backwards_TL/djw9vsut/400003072/sparsity.safetensors b/backwards_TL/djw9vsut/400003072/sparsity.safetensors deleted file mode 100644 index a73621cdd853a493fde11ef5106c00752f66c9cb..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b9baca68defa7b370c8f44378abc7cfc1c800cc75ca3649cb30a703346ced688 -size 24656 diff --git a/backwards_TL/djw9vsut/final_500002816/cfg.json b/backwards_TL/djw9vsut/final_500002816/cfg.json deleted file mode 100644 index 7a31d30ec1d468bdacdb4b32d12c742f8842e980..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.8.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 8, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L9", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L8_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/djw9vsut", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/djw9vsut/final_500002816/sae_weights.safetensors b/backwards_TL/djw9vsut/final_500002816/sae_weights.safetensors deleted file mode 100644 index 2c03e761ff3144c9360b63a0fc0e8cedae482c41..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:57d935477552535a76aa373274260a92af693dacb7b41e8fd7c7b74b6c69fbd1 -size 37801344 diff --git a/backwards_TL/djw9vsut/final_500002816/sparsity.safetensors b/backwards_TL/djw9vsut/final_500002816/sparsity.safetensors deleted file mode 100644 index af6c84a6a61e32000e202dc80e5e2e86d62ca995..0000000000000000000000000000000000000000 --- a/backwards_TL/djw9vsut/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c6c2c2577594fdda2908b675580ad41cd6f4672a7dfbcf94f7da6159553bfb1e -size 24656 diff --git a/backwards_TL/drbi3oow/100003840/cfg.json b/backwards_TL/drbi3oow/100003840/cfg.json deleted file mode 100644 index b135ac4313f8fe9ad24b8838e09d726e6a34354f..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.9.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 9, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L10", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L9_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/drbi3oow", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/drbi3oow/100003840/sae_weights.safetensors b/backwards_TL/drbi3oow/100003840/sae_weights.safetensors deleted file mode 100644 index b5e0d3f1dab17b5ce836bbfce22ff2e1c6308e30..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:240a5ea1acd913b121e20e76ed4713fe80fa907c8d69178ca37ea9a2f9e21329 -size 37801344 diff --git a/backwards_TL/drbi3oow/100003840/sparsity.safetensors b/backwards_TL/drbi3oow/100003840/sparsity.safetensors deleted file mode 100644 index 310d23b6e43d32a78d9afda69b67f2f9030c29d0..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c8bd076cb5293df80bf5138602ddb0e898ed59500252f5346460a37a0f234813 -size 24656 diff --git a/backwards_TL/drbi3oow/200003584/cfg.json b/backwards_TL/drbi3oow/200003584/cfg.json deleted file mode 100644 index b135ac4313f8fe9ad24b8838e09d726e6a34354f..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.9.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 9, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L10", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L9_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/drbi3oow", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/drbi3oow/200003584/sae_weights.safetensors b/backwards_TL/drbi3oow/200003584/sae_weights.safetensors deleted file mode 100644 index 53b75f8bd98927bd0025a0e2d5d54878743f4765..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a080bb6c343a16bb1a19f7bdb1d8c4e271a29754e6540ba49df08746ed0bb2e -size 37801344 diff --git a/backwards_TL/drbi3oow/200003584/sparsity.safetensors b/backwards_TL/drbi3oow/200003584/sparsity.safetensors deleted file mode 100644 index 9d89573855aac289b1f0a918e3d2a0bf6217cb75..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8ec38a833b90d61e9e6aede615c0a34c5b34deb6ba5b896689193e33887788cd -size 24656 diff --git a/backwards_TL/drbi3oow/300003328/cfg.json b/backwards_TL/drbi3oow/300003328/cfg.json deleted file mode 100644 index b135ac4313f8fe9ad24b8838e09d726e6a34354f..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.9.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 9, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L10", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L9_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/drbi3oow", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/drbi3oow/300003328/sae_weights.safetensors b/backwards_TL/drbi3oow/300003328/sae_weights.safetensors deleted file mode 100644 index 8794c543ee0de30271dcf42a5e63ae02649bda92..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d663f5d6adbce65785a58b21600505269afe9aacd7052753f8f5d26af74b07e3 -size 37801344 diff --git a/backwards_TL/drbi3oow/300003328/sparsity.safetensors b/backwards_TL/drbi3oow/300003328/sparsity.safetensors deleted file mode 100644 index 8ccaf2daa62f866131cd25e5f55736cc17d24ee2..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e9924396a639a0b40eb0cd5821c14f1403e3902d93187ed14c090666a41cb9ec -size 24656 diff --git a/backwards_TL/drbi3oow/400003072/cfg.json b/backwards_TL/drbi3oow/400003072/cfg.json deleted file mode 100644 index b135ac4313f8fe9ad24b8838e09d726e6a34354f..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.9.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 9, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L10", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L9_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/drbi3oow", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/drbi3oow/400003072/sae_weights.safetensors b/backwards_TL/drbi3oow/400003072/sae_weights.safetensors deleted file mode 100644 index a03f9b3e1799e7bf46ae336a0f24a4c5c42dc54e..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1927ac7c1727f991bf36c5754bee0b69db35e5c59b44fa8aef749a66d7c29612 -size 37801344 diff --git a/backwards_TL/drbi3oow/400003072/sparsity.safetensors b/backwards_TL/drbi3oow/400003072/sparsity.safetensors deleted file mode 100644 index 1a4f3180097db72e5ec1f876b246c8dcc733bf08..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6ec24b0de2e36541fb6d0918004cb1112990e3fe0968433be5ac9376db70b244 -size 24656 diff --git a/backwards_TL/drbi3oow/final_500002816/cfg.json b/backwards_TL/drbi3oow/final_500002816/cfg.json deleted file mode 100644 index b135ac4313f8fe9ad24b8838e09d726e6a34354f..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.9.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 9, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L10", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L9_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/drbi3oow", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/drbi3oow/final_500002816/sae_weights.safetensors b/backwards_TL/drbi3oow/final_500002816/sae_weights.safetensors deleted file mode 100644 index a2bc01b0ef979887b0ba5213e88b82f5df7cd724..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:834432085e0d15a45c3c2311f52c3546a98eb522a7d52c4eeb3d9086a807ea7e -size 37801344 diff --git a/backwards_TL/drbi3oow/final_500002816/sparsity.safetensors b/backwards_TL/drbi3oow/final_500002816/sparsity.safetensors deleted file mode 100644 index 7e21268f3cba8aff303dba88822d3c3fd6149425..0000000000000000000000000000000000000000 --- a/backwards_TL/drbi3oow/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8740f1fae01c9d33e78e74f8ca20b6ef4b080db496e476ec16a312801a962f0b -size 24656 diff --git a/backwards_TL/e902fx68/100003840/cfg.json b/backwards_TL/e902fx68/100003840/cfg.json deleted file mode 100644 index 30b421bfea79f4c8c16fa8d01c38528a56328a03..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.1.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 1, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L2", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L1_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/e902fx68", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/e902fx68/100003840/sae_weights.safetensors b/backwards_TL/e902fx68/100003840/sae_weights.safetensors deleted file mode 100644 index 91c594ab35c91cb1bdf76f41a210bc82984f4d45..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:40654495475ccdfa37cba476e5365566e00c0b44cd848978c3a73c679570633b -size 37801344 diff --git a/backwards_TL/e902fx68/100003840/sparsity.safetensors b/backwards_TL/e902fx68/100003840/sparsity.safetensors deleted file mode 100644 index 34c53d2068919b62572d719ba38c6d473cad23c0..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a86592e4854a528529eab9693d810a1a5f2fea1e2a4dfebeffe23b1933caba07 -size 24656 diff --git a/backwards_TL/e902fx68/200003584/cfg.json b/backwards_TL/e902fx68/200003584/cfg.json deleted file mode 100644 index 30b421bfea79f4c8c16fa8d01c38528a56328a03..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.1.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 1, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L2", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L1_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/e902fx68", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/e902fx68/200003584/sae_weights.safetensors b/backwards_TL/e902fx68/200003584/sae_weights.safetensors deleted file mode 100644 index d88ef81c78f96601ba4fa61ee32bc90da4683649..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:faeff4d4e2cb069296723e02137a1f7c2262f85b9207a97ff0c44591a669b995 -size 37801344 diff --git a/backwards_TL/e902fx68/200003584/sparsity.safetensors b/backwards_TL/e902fx68/200003584/sparsity.safetensors deleted file mode 100644 index 1b8d9be6ae7ecade82eac9005425bccee9d12941..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bbe8a880c3cd75eab5e4a2cff2565620fe824fb546a769b99cfac144975a7a80 -size 24656 diff --git a/backwards_TL/e902fx68/300003328/cfg.json b/backwards_TL/e902fx68/300003328/cfg.json deleted file mode 100644 index 30b421bfea79f4c8c16fa8d01c38528a56328a03..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.1.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 1, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L2", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L1_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/e902fx68", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/e902fx68/300003328/sae_weights.safetensors b/backwards_TL/e902fx68/300003328/sae_weights.safetensors deleted file mode 100644 index dddbfe23d28f788d838b7cfa2a265d0fea2bcd7d..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2512ab962c015caf7df3065b6f9029923b84a6605a9013d77b928741c4d88c2b -size 37801344 diff --git a/backwards_TL/e902fx68/300003328/sparsity.safetensors b/backwards_TL/e902fx68/300003328/sparsity.safetensors deleted file mode 100644 index 4314cf3bc120eda3c25ab3dfd990f495e0f68092..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e0bcbdff899ed867b0f7ad7541cc91b42157a2f4899454a7008ca22939ad7a94 -size 24656 diff --git a/backwards_TL/e902fx68/400003072/cfg.json b/backwards_TL/e902fx68/400003072/cfg.json deleted file mode 100644 index 30b421bfea79f4c8c16fa8d01c38528a56328a03..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.1.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 1, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L2", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L1_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/e902fx68", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/e902fx68/400003072/sae_weights.safetensors b/backwards_TL/e902fx68/400003072/sae_weights.safetensors deleted file mode 100644 index 60c2371f9afe7b5d672892f50829d9071ed70100..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b83684c51c5d75b650c6d0c45216a426f53f1d8f29d11b612f12ba4221ac5f5b -size 37801344 diff --git a/backwards_TL/e902fx68/400003072/sparsity.safetensors b/backwards_TL/e902fx68/400003072/sparsity.safetensors deleted file mode 100644 index 5d5242c3c38a536376fe7498da3ff1e5780516ac..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4f505ab8c0d2a825afb14b676b4acef5c3c99827b93de3cba9990018cd6febc3 -size 24656 diff --git a/backwards_TL/e902fx68/final_500002816/cfg.json b/backwards_TL/e902fx68/final_500002816/cfg.json deleted file mode 100644 index 30b421bfea79f4c8c16fa8d01c38528a56328a03..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.1.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 1, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L2", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L1_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/e902fx68", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/e902fx68/final_500002816/sae_weights.safetensors b/backwards_TL/e902fx68/final_500002816/sae_weights.safetensors deleted file mode 100644 index 800593f595d2539f3c2336b7d2bb07c1dfdef838..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eede70447401ff9ab88d6e2a2d2bd52fae4035b2edbaca337bc67a9cd8ac2d03 -size 37801344 diff --git a/backwards_TL/e902fx68/final_500002816/sparsity.safetensors b/backwards_TL/e902fx68/final_500002816/sparsity.safetensors deleted file mode 100644 index ac212c8c64725e7a7c2b4cfcfa0019d2517cc3de..0000000000000000000000000000000000000000 --- a/backwards_TL/e902fx68/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c4b05feb5c4cf7cbb3ce4648e8fd36be3924fc33a9d942232ad189d1c460a267 -size 24656 diff --git a/backwards_TL/efgybgxl/100003840/cfg.json b/backwards_TL/efgybgxl/100003840/cfg.json deleted file mode 100644 index cc5047640a2fb1ee5f7ecf4cfadb049c99cfadc1..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.7.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 7, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L8", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L7_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/efgybgxl", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/efgybgxl/100003840/sae_weights.safetensors b/backwards_TL/efgybgxl/100003840/sae_weights.safetensors deleted file mode 100644 index fad81de676c184a0868d694e3197e04c912cf7e8..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7a6b7bb089ad0220539a66c83213f3ccb57a880e918764bf106a6ac1a780b9c5 -size 37801344 diff --git a/backwards_TL/efgybgxl/100003840/sparsity.safetensors b/backwards_TL/efgybgxl/100003840/sparsity.safetensors deleted file mode 100644 index 4b884640719cb82b5404d8e44414277f5b5512a5..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ef2e3b483f5d038ae621b0b550c350ee17cc00820d4c1af004265670fa84d28a -size 24656 diff --git a/backwards_TL/efgybgxl/200003584/cfg.json b/backwards_TL/efgybgxl/200003584/cfg.json deleted file mode 100644 index cc5047640a2fb1ee5f7ecf4cfadb049c99cfadc1..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.7.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 7, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L8", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L7_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/efgybgxl", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/efgybgxl/200003584/sae_weights.safetensors b/backwards_TL/efgybgxl/200003584/sae_weights.safetensors deleted file mode 100644 index 6adbbc69bd7b6301cc35f3cb53152f0553b2b618..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:772ba6e9b9826ebb9fadc0cc8304f53921da1b2b7dc86c49ec82f217963d75c2 -size 37801344 diff --git a/backwards_TL/efgybgxl/200003584/sparsity.safetensors b/backwards_TL/efgybgxl/200003584/sparsity.safetensors deleted file mode 100644 index 5d659b221b150f551a37368defcf1dfbca58896a..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a018b42b4c0ec0b2a034b0e486074c185ec34feb952a895c498cabfbcf1cfbf4 -size 24656 diff --git a/backwards_TL/efgybgxl/300003328/cfg.json b/backwards_TL/efgybgxl/300003328/cfg.json deleted file mode 100644 index cc5047640a2fb1ee5f7ecf4cfadb049c99cfadc1..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.7.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 7, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L8", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L7_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/efgybgxl", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/efgybgxl/300003328/sae_weights.safetensors b/backwards_TL/efgybgxl/300003328/sae_weights.safetensors deleted file mode 100644 index 1278ab9b1aeb9c82dbbaab48f8b23e73d03553ce..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:95d9fd138ecd69456ed2579e8222424e14b5bae788f03e21a77b3daa96a5ab64 -size 37801344 diff --git a/backwards_TL/efgybgxl/300003328/sparsity.safetensors b/backwards_TL/efgybgxl/300003328/sparsity.safetensors deleted file mode 100644 index 55cde25eed5c2c73045dc9fdad1b9a5e8fa4b6a1..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:91e07f91e4d53e477a0790b5ea56806ab746e970d4581414bf66f95c5949dd04 -size 24656 diff --git a/backwards_TL/efgybgxl/400003072/cfg.json b/backwards_TL/efgybgxl/400003072/cfg.json deleted file mode 100644 index cc5047640a2fb1ee5f7ecf4cfadb049c99cfadc1..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.7.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 7, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L8", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L7_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/efgybgxl", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/efgybgxl/400003072/sae_weights.safetensors b/backwards_TL/efgybgxl/400003072/sae_weights.safetensors deleted file mode 100644 index e5e57590184fc2198c6f890757e9723ab873ad05..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b7b3ad77c90c8ccad53ee1f796dad7e74f8461fbdb173adbfda7cbc752b683ca -size 37801344 diff --git a/backwards_TL/efgybgxl/400003072/sparsity.safetensors b/backwards_TL/efgybgxl/400003072/sparsity.safetensors deleted file mode 100644 index 6d212c5b03ec570485a36ddb0b3f91c3373de199..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9bebfa50f11e49d36f7bbbc30dbe0f7c082c49c61cc1b8bb016651a5d1cd59a3 -size 24656 diff --git a/backwards_TL/efgybgxl/final_500002816/cfg.json b/backwards_TL/efgybgxl/final_500002816/cfg.json deleted file mode 100644 index cc5047640a2fb1ee5f7ecf4cfadb049c99cfadc1..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.7.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 7, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L8", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L7_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/efgybgxl", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/efgybgxl/final_500002816/sae_weights.safetensors b/backwards_TL/efgybgxl/final_500002816/sae_weights.safetensors deleted file mode 100644 index 0b747880eb40050468300b9ae9d344447bed0a7e..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c76898c39430ce32559411bccaedb6992eea8f1bd6b7304a8f2a178777079088 -size 37801344 diff --git a/backwards_TL/efgybgxl/final_500002816/sparsity.safetensors b/backwards_TL/efgybgxl/final_500002816/sparsity.safetensors deleted file mode 100644 index 2cda4f0b3e8b0e0149bb08dcc8aa4accff100c75..0000000000000000000000000000000000000000 --- a/backwards_TL/efgybgxl/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:63070e572392533cfe3e2323ac0dd0a52c6c50fadf771625a6a7a6f260120ee6 -size 24656 diff --git a/backwards_TL/epfkz598/100003840/cfg.json b/backwards_TL/epfkz598/100003840/cfg.json deleted file mode 100644 index fc8bc157fe3867d28d367eb338c31afa2e04b97e..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.6.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 6, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L7", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L6_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/epfkz598", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/epfkz598/100003840/sae_weights.safetensors b/backwards_TL/epfkz598/100003840/sae_weights.safetensors deleted file mode 100644 index 00c5fca8b6628a5185d2400f391f3ce39e885762..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c43c06472003fa8863482a4d31179ba66e2d11bcb4c9b494ed68132289cdd9d4 -size 37801344 diff --git a/backwards_TL/epfkz598/100003840/sparsity.safetensors b/backwards_TL/epfkz598/100003840/sparsity.safetensors deleted file mode 100644 index 84ca831ad23798defd31f740a4bd45c2d1768209..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:22c53cd0b0303427597edf6df858f9d619225fed1f38ebec2ff064cfb23e344e -size 24656 diff --git a/backwards_TL/epfkz598/200003584/cfg.json b/backwards_TL/epfkz598/200003584/cfg.json deleted file mode 100644 index fc8bc157fe3867d28d367eb338c31afa2e04b97e..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.6.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 6, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L7", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L6_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/epfkz598", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/epfkz598/200003584/sae_weights.safetensors b/backwards_TL/epfkz598/200003584/sae_weights.safetensors deleted file mode 100644 index abae81f9ee6fe13b533fd386858868989ba3ad70..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a88dbced960067b27e7c48b5a35d6d8e80261211f95f7aa0a1d3fa29a0e3920 -size 37801344 diff --git a/backwards_TL/epfkz598/200003584/sparsity.safetensors b/backwards_TL/epfkz598/200003584/sparsity.safetensors deleted file mode 100644 index fdae5dd2dbb7a2525023fd42d159243c9ef2f86d..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b9170f641e430620213573f6298e02af004b5442ff6b69a747c71fddcee70ec8 -size 24656 diff --git a/backwards_TL/epfkz598/300003328/cfg.json b/backwards_TL/epfkz598/300003328/cfg.json deleted file mode 100644 index fc8bc157fe3867d28d367eb338c31afa2e04b97e..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.6.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 6, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L7", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L6_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/epfkz598", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/epfkz598/300003328/sae_weights.safetensors b/backwards_TL/epfkz598/300003328/sae_weights.safetensors deleted file mode 100644 index ac4eec132aa96af8f7d8986ab6631a827d17accb..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a200030381c7771e39d384bcb4ec2aad8c48113d21ca1ce2753eb5a03ecdc525 -size 37801344 diff --git a/backwards_TL/epfkz598/300003328/sparsity.safetensors b/backwards_TL/epfkz598/300003328/sparsity.safetensors deleted file mode 100644 index 4be424796f4a304c2ffa2a98d82930e0cad7bdc3..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d16b8c33ff98e1077ac2507f0f3ae05039d3d2ce2c370a81b1cf6e18a5e9e25a -size 24656 diff --git a/backwards_TL/epfkz598/400003072/cfg.json b/backwards_TL/epfkz598/400003072/cfg.json deleted file mode 100644 index fc8bc157fe3867d28d367eb338c31afa2e04b97e..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.6.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 6, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L7", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L6_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/epfkz598", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/epfkz598/400003072/sae_weights.safetensors b/backwards_TL/epfkz598/400003072/sae_weights.safetensors deleted file mode 100644 index 9b2b2d6a13743bf75da5a946fe7c88d30c58a709..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9904604b5a15528f04d79289199beab899f85ac23716c0faa7744e38ceb02c62 -size 37801344 diff --git a/backwards_TL/epfkz598/400003072/sparsity.safetensors b/backwards_TL/epfkz598/400003072/sparsity.safetensors deleted file mode 100644 index 44ee79592f98084bb40aa09fb21c10c60bc55819..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b870949450992706701a2ae6e39b74c2ba2205bcda46ef1c90f67eb1f1ac62ba -size 24656 diff --git a/backwards_TL/epfkz598/final_500002816/cfg.json b/backwards_TL/epfkz598/final_500002816/cfg.json deleted file mode 100644 index fc8bc157fe3867d28d367eb338c31afa2e04b97e..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.6.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 6, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L7", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L6_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/epfkz598", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/epfkz598/final_500002816/sae_weights.safetensors b/backwards_TL/epfkz598/final_500002816/sae_weights.safetensors deleted file mode 100644 index 446fae9e2d4a209a06b63a4bbd903fff45aa9284..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:acae715ffa216bd94c800194c257ebe51b41f55795ff6bb0f5081fca7d712a26 -size 37801344 diff --git a/backwards_TL/epfkz598/final_500002816/sparsity.safetensors b/backwards_TL/epfkz598/final_500002816/sparsity.safetensors deleted file mode 100644 index 08311ca31800fe6e02e16ea7494633b4fcd4b42a..0000000000000000000000000000000000000000 --- a/backwards_TL/epfkz598/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1be103810fc424d00209ecf4cb653a8bf06dae505e1d1d17433ca2ac40d3bdca -size 24656 diff --git a/backwards_TL/h76le4wl/100003840/cfg.json b/backwards_TL/h76le4wl/100003840/cfg.json deleted file mode 100644 index 569aa3c63bb70f121421c85d102da99aa35e5355..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.4.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 4, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L5", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L4_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/h76le4wl", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/h76le4wl/100003840/sae_weights.safetensors b/backwards_TL/h76le4wl/100003840/sae_weights.safetensors deleted file mode 100644 index 68f91dbe45eea38192339d4d53e529d410361df9..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:37438cb656456f218cac9afe49ffc2890d387c946d79cff4ad4284d74fa4c0dc -size 37801344 diff --git a/backwards_TL/h76le4wl/100003840/sparsity.safetensors b/backwards_TL/h76le4wl/100003840/sparsity.safetensors deleted file mode 100644 index 63bd563184dfcf85101193ea2571f57ac12f1597..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:231a8e5084b1deb2f21129fa81166d9a11e15316f6003167ffceeb8a1e9b7837 -size 24656 diff --git a/backwards_TL/h76le4wl/200003584/cfg.json b/backwards_TL/h76le4wl/200003584/cfg.json deleted file mode 100644 index 569aa3c63bb70f121421c85d102da99aa35e5355..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.4.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 4, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L5", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L4_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/h76le4wl", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/h76le4wl/200003584/sae_weights.safetensors b/backwards_TL/h76le4wl/200003584/sae_weights.safetensors deleted file mode 100644 index a5eb0b65cbdb17f9ed80790a547990e6b44a5967..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7e4b0c2a22db2b4dc2cd66823d54a888163dcb9dc99c67a589188b7aa250ce64 -size 37801344 diff --git a/backwards_TL/h76le4wl/200003584/sparsity.safetensors b/backwards_TL/h76le4wl/200003584/sparsity.safetensors deleted file mode 100644 index 544086d020d288c60d36d1ed215c47eae09d54d2..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b415d6ac03136f83f302f34bfe0a35c1dd375be36bce71755a8588ba4328fc2f -size 24656 diff --git a/backwards_TL/h76le4wl/300003328/cfg.json b/backwards_TL/h76le4wl/300003328/cfg.json deleted file mode 100644 index 569aa3c63bb70f121421c85d102da99aa35e5355..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.4.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 4, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L5", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L4_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/h76le4wl", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/h76le4wl/300003328/sae_weights.safetensors b/backwards_TL/h76le4wl/300003328/sae_weights.safetensors deleted file mode 100644 index 2d34a7dc9f1d707bb5cf846a89460e77acc54c02..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:663f7e3f04d5263084a7efdc57d755694259c949262d3c8ed4cd088e4b0a2af6 -size 37801344 diff --git a/backwards_TL/h76le4wl/300003328/sparsity.safetensors b/backwards_TL/h76le4wl/300003328/sparsity.safetensors deleted file mode 100644 index 0ae3fff47b84b6993ec4195cb1cf71d6cedcc425..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a6dd871fb603b12805948c6da3b01a980c58d8c4b2539750cd3588e7f1bc9882 -size 24656 diff --git a/backwards_TL/h76le4wl/400003072/cfg.json b/backwards_TL/h76le4wl/400003072/cfg.json deleted file mode 100644 index 569aa3c63bb70f121421c85d102da99aa35e5355..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.4.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 4, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L5", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L4_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/h76le4wl", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/h76le4wl/400003072/sae_weights.safetensors b/backwards_TL/h76le4wl/400003072/sae_weights.safetensors deleted file mode 100644 index 4e1688c5906a674af51ddb5f93cb9c024452b0ae..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ac2fc030444c3233c6db0d45217e1cc0af263efd3f0e9bd4a0d34e3c69181d1d -size 37801344 diff --git a/backwards_TL/h76le4wl/400003072/sparsity.safetensors b/backwards_TL/h76le4wl/400003072/sparsity.safetensors deleted file mode 100644 index 288de2b179af9ee11af797a16b57c96d2f2be6f1..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eb0339115daa9f799994bc0920fb7915be8161329e4e691191ad597ac655b9e0 -size 24656 diff --git a/backwards_TL/h76le4wl/final_500002816/cfg.json b/backwards_TL/h76le4wl/final_500002816/cfg.json deleted file mode 100644 index 569aa3c63bb70f121421c85d102da99aa35e5355..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.4.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 4, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L5", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L4_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/h76le4wl", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/h76le4wl/final_500002816/sae_weights.safetensors b/backwards_TL/h76le4wl/final_500002816/sae_weights.safetensors deleted file mode 100644 index a7a700c936a445e8f212f70d4e5e9219c3f7de17..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f0534b7cae2b12ad21540aa8a88ca71322e0e0e94a25f4338eb2209ffd41e15 -size 37801344 diff --git a/backwards_TL/h76le4wl/final_500002816/sparsity.safetensors b/backwards_TL/h76le4wl/final_500002816/sparsity.safetensors deleted file mode 100644 index 7ce3fad90a67a180465232bd752cfdf15bbb00e3..0000000000000000000000000000000000000000 --- a/backwards_TL/h76le4wl/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:15d893250c9b29543fd70c52e5d9fd50c172e4d3fc62ed856e2c6ea24f1b1740 -size 24656 diff --git a/backwards_TL/is8g1rbr/100003840/cfg.json b/backwards_TL/is8g1rbr/100003840/cfg.json deleted file mode 100644 index 80b9556319106e22d48b75195607da5316789e9a..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.2.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 2, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L3", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L2_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/is8g1rbr", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/is8g1rbr/100003840/sae_weights.safetensors b/backwards_TL/is8g1rbr/100003840/sae_weights.safetensors deleted file mode 100644 index 2e90af4c94ab825ca5725945970fec0078e1e459..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8088d59e465ebf95e62b76e2fc5ac260da1a0baf3eb145ee7426bfc13ce609bb -size 37801344 diff --git a/backwards_TL/is8g1rbr/100003840/sparsity.safetensors b/backwards_TL/is8g1rbr/100003840/sparsity.safetensors deleted file mode 100644 index 3e763c8c4740028620b3f0b4c385ab997291335d..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b284c60af440f7662815ea431c073ae5c6e9c76685a4a447aed3e74b51ec1dd4 -size 24656 diff --git a/backwards_TL/is8g1rbr/200003584/cfg.json b/backwards_TL/is8g1rbr/200003584/cfg.json deleted file mode 100644 index 80b9556319106e22d48b75195607da5316789e9a..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.2.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 2, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L3", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L2_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/is8g1rbr", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/is8g1rbr/200003584/sae_weights.safetensors b/backwards_TL/is8g1rbr/200003584/sae_weights.safetensors deleted file mode 100644 index 4fcaf1db4e47439705f90f6d58ec8c9951c0abe5..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aeb0b7a2ded0e6dca599df75587d19a19b35d527013af2349bdd4e0c2e157116 -size 37801344 diff --git a/backwards_TL/is8g1rbr/200003584/sparsity.safetensors b/backwards_TL/is8g1rbr/200003584/sparsity.safetensors deleted file mode 100644 index 141b5495da89e0dfea5781b34188d4df58248028..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:552111f6a4eeff0c5d2c6c4bf27aa252d48517852a3a7639bf84a6223076508c -size 24656 diff --git a/backwards_TL/is8g1rbr/300003328/cfg.json b/backwards_TL/is8g1rbr/300003328/cfg.json deleted file mode 100644 index 80b9556319106e22d48b75195607da5316789e9a..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.2.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 2, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L3", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L2_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/is8g1rbr", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/is8g1rbr/300003328/sae_weights.safetensors b/backwards_TL/is8g1rbr/300003328/sae_weights.safetensors deleted file mode 100644 index 0a0ea4e8acf05ced64522f613f0ebec561b0dd85..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a7f0156b7b0f70bd6b1fb43b075aa445b266465a289454cc86a0497b4dc7b60f -size 37801344 diff --git a/backwards_TL/is8g1rbr/300003328/sparsity.safetensors b/backwards_TL/is8g1rbr/300003328/sparsity.safetensors deleted file mode 100644 index 20b1b2d404b0c0484f7fabe282a0b6eced6de407..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0fb30f6789840fec89ee8a1567ae89845b8f145d418eda390360a70d6ca7adbd -size 24656 diff --git a/backwards_TL/is8g1rbr/400003072/cfg.json b/backwards_TL/is8g1rbr/400003072/cfg.json deleted file mode 100644 index 80b9556319106e22d48b75195607da5316789e9a..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.2.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 2, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L3", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L2_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/is8g1rbr", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/is8g1rbr/400003072/sae_weights.safetensors b/backwards_TL/is8g1rbr/400003072/sae_weights.safetensors deleted file mode 100644 index 4f63d946caa7a82c2c5ce26f2934eec83a643e5c..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39ef3c0e2dcfe190f44c6cac7cac08f67afc47c24640648d41912b3db81f8190 -size 37801344 diff --git a/backwards_TL/is8g1rbr/400003072/sparsity.safetensors b/backwards_TL/is8g1rbr/400003072/sparsity.safetensors deleted file mode 100644 index ee7a830d32c5dee3e6dfbb3c2181c1faebb5bdda..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a2608e6ffcebb7d2f2e949ffe603780f9cabd5d2a659c2af08397483e2fe047b -size 24656 diff --git a/backwards_TL/is8g1rbr/final_500002816/cfg.json b/backwards_TL/is8g1rbr/final_500002816/cfg.json deleted file mode 100644 index 80b9556319106e22d48b75195607da5316789e9a..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.2.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 2, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L3", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L2_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/is8g1rbr", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/is8g1rbr/final_500002816/sae_weights.safetensors b/backwards_TL/is8g1rbr/final_500002816/sae_weights.safetensors deleted file mode 100644 index 7815a87832f467509f39c2b24950430870576a91..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:40b02004d239d83dd6dc70b0561fff4323ea85938483f4a6521f3eb7cb492ca1 -size 37801344 diff --git a/backwards_TL/is8g1rbr/final_500002816/sparsity.safetensors b/backwards_TL/is8g1rbr/final_500002816/sparsity.safetensors deleted file mode 100644 index e86bfe903505bea0a09bd4b3864e5c45d7bc330e..0000000000000000000000000000000000000000 --- a/backwards_TL/is8g1rbr/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1ba516ead1a1203d3dbda591a406fe1fbb989dc1b7f62f2ebd2c6eaf480b9a59 -size 24656 diff --git a/backwards_TL/jahbro6e/100003840/cfg.json b/backwards_TL/jahbro6e/100003840/cfg.json deleted file mode 100644 index 1e941b03012790f7ceb0322fdea8f31f3ab95162..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.5.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 5, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L6", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L5_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/jahbro6e", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/jahbro6e/100003840/sae_weights.safetensors b/backwards_TL/jahbro6e/100003840/sae_weights.safetensors deleted file mode 100644 index 9a9ca1d7c05d1e3920f6d3169ec2aedfaf091501..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d0768b87e9e0169820ed3aa5d36bf528df27a69eeeb147a0405f0b88f03e9da8 -size 37801344 diff --git a/backwards_TL/jahbro6e/100003840/sparsity.safetensors b/backwards_TL/jahbro6e/100003840/sparsity.safetensors deleted file mode 100644 index 39bc22f358475e7d6d2df2d4028cb0783f4877b4..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:71e3e4765510ec60f7142c773351bf1b02e993f60287c5ea5af2229484d0f334 -size 24656 diff --git a/backwards_TL/jahbro6e/200003584/cfg.json b/backwards_TL/jahbro6e/200003584/cfg.json deleted file mode 100644 index 1e941b03012790f7ceb0322fdea8f31f3ab95162..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.5.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 5, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L6", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L5_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/jahbro6e", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/jahbro6e/200003584/sae_weights.safetensors b/backwards_TL/jahbro6e/200003584/sae_weights.safetensors deleted file mode 100644 index 3e70c4d8da967eb3f78d2c8802fb41b4fef2f5f5..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a106c95d733a25e4c1420424d9cea445e675805437731b8820dfb946a62d96cc -size 37801344 diff --git a/backwards_TL/jahbro6e/200003584/sparsity.safetensors b/backwards_TL/jahbro6e/200003584/sparsity.safetensors deleted file mode 100644 index a21ebdb299f996afe553be1a23e75ddb4f034a3c..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3df10714787ae6411b0c50156091acfb8b966cbe6e7d75ba7a3664e9f4d26a0f -size 24656 diff --git a/backwards_TL/jahbro6e/300003328/cfg.json b/backwards_TL/jahbro6e/300003328/cfg.json deleted file mode 100644 index 1e941b03012790f7ceb0322fdea8f31f3ab95162..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.5.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 5, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L6", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L5_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/jahbro6e", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/jahbro6e/300003328/sae_weights.safetensors b/backwards_TL/jahbro6e/300003328/sae_weights.safetensors deleted file mode 100644 index da2ed959c87f0eeee127ec7380e195dc170f7096..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c2c7d23f15c6c8269d58ee14906ef5cc62a0d201cbd723158d7dd53edc62afbf -size 37801344 diff --git a/backwards_TL/jahbro6e/300003328/sparsity.safetensors b/backwards_TL/jahbro6e/300003328/sparsity.safetensors deleted file mode 100644 index 18eda007c5c7ee8cc6245875893b4e1221657e7f..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd38c5f9784ca7fc3452adc3ae6919d0556ced0320c35e62f2fc9b5868624c11 -size 24656 diff --git a/backwards_TL/jahbro6e/400003072/cfg.json b/backwards_TL/jahbro6e/400003072/cfg.json deleted file mode 100644 index 1e941b03012790f7ceb0322fdea8f31f3ab95162..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.5.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 5, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L6", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L5_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/jahbro6e", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/jahbro6e/400003072/sae_weights.safetensors b/backwards_TL/jahbro6e/400003072/sae_weights.safetensors deleted file mode 100644 index 0be54b5c3e2c634bdb90d6e9ed11761406233192..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:517fa74aa700fcf6ed709a29838e8c4de75fc866692a1619d97135939c42e71b -size 37801344 diff --git a/backwards_TL/jahbro6e/400003072/sparsity.safetensors b/backwards_TL/jahbro6e/400003072/sparsity.safetensors deleted file mode 100644 index 07e0f797749c2d14e81c6424c4a1df1bd6faccd6..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f0c2be80da4d64cb4db00aac9d986b18ea3ad1e98b6de09f66bc7f7d561cc83 -size 24656 diff --git a/backwards_TL/jahbro6e/final_500002816/cfg.json b/backwards_TL/jahbro6e/final_500002816/cfg.json deleted file mode 100644 index 1e941b03012790f7ceb0322fdea8f31f3ab95162..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.5.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 5, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L6", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L5_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/jahbro6e", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/jahbro6e/final_500002816/sae_weights.safetensors b/backwards_TL/jahbro6e/final_500002816/sae_weights.safetensors deleted file mode 100644 index a4d24a6796e258b55e77524824b5f316b414c309..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d6b03a4472d7cfb95f2790af10c44ce3f6d6ccc45f9815323ac249ce2711898 -size 37801344 diff --git a/backwards_TL/jahbro6e/final_500002816/sparsity.safetensors b/backwards_TL/jahbro6e/final_500002816/sparsity.safetensors deleted file mode 100644 index a03ec52261f90ddb9806a376e0f339960612e139..0000000000000000000000000000000000000000 --- a/backwards_TL/jahbro6e/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:93031963a7e0d355ded5944313e4d36dcea9c5749d8b8ff605e032101a18a9ad -size 24656 diff --git a/backwards_TL/xxt7ihjo/100003840/cfg.json b/backwards_TL/xxt7ihjo/100003840/cfg.json deleted file mode 100644 index 7479448673316736a22f2dde7cd0d4969b091f03..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/100003840/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.3.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 3, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L4", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L3_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/xxt7ihjo", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/xxt7ihjo/100003840/sae_weights.safetensors b/backwards_TL/xxt7ihjo/100003840/sae_weights.safetensors deleted file mode 100644 index 18bef9669a09b85081e7e18d837953b6c8a8e9b4..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/100003840/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2825975c65965617026bf80ab4200705645a079cf76d0adfa65167e25f7d0b6a -size 37801344 diff --git a/backwards_TL/xxt7ihjo/100003840/sparsity.safetensors b/backwards_TL/xxt7ihjo/100003840/sparsity.safetensors deleted file mode 100644 index 41c4ec8eea4bd149ccab23b21d4327e5daf8938f..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/100003840/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:392022ac7bcf467f9cff8f49c048d4909423bacb04738c794d3d69a0d8656321 -size 24656 diff --git a/backwards_TL/xxt7ihjo/200003584/cfg.json b/backwards_TL/xxt7ihjo/200003584/cfg.json deleted file mode 100644 index 7479448673316736a22f2dde7cd0d4969b091f03..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/200003584/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.3.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 3, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L4", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L3_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/xxt7ihjo", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/xxt7ihjo/200003584/sae_weights.safetensors b/backwards_TL/xxt7ihjo/200003584/sae_weights.safetensors deleted file mode 100644 index bd5c2560453aa317edda416c3a29645a9213aed7..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/200003584/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e01edf4586fd5ff6394357717e2482a82e5d4811a71278fe8c079e00706f438c -size 37801344 diff --git a/backwards_TL/xxt7ihjo/200003584/sparsity.safetensors b/backwards_TL/xxt7ihjo/200003584/sparsity.safetensors deleted file mode 100644 index 54492b98ec7ce16e38873efff660757443fe806d..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/200003584/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0e4681efd0426d3559e3bcafb3bf4694a6e8844185af2afd11dc97a9800bde2d -size 24656 diff --git a/backwards_TL/xxt7ihjo/300003328/cfg.json b/backwards_TL/xxt7ihjo/300003328/cfg.json deleted file mode 100644 index 7479448673316736a22f2dde7cd0d4969b091f03..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/300003328/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.3.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 3, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L4", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L3_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/xxt7ihjo", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/xxt7ihjo/300003328/sae_weights.safetensors b/backwards_TL/xxt7ihjo/300003328/sae_weights.safetensors deleted file mode 100644 index f7cf8e36f6d2f2064dab7fb654559cbf753c997b..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/300003328/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3d14ef1a0159aa1a99a8a91626918d6e025b33f79f5ee468ea811cd01dd4978c -size 37801344 diff --git a/backwards_TL/xxt7ihjo/300003328/sparsity.safetensors b/backwards_TL/xxt7ihjo/300003328/sparsity.safetensors deleted file mode 100644 index c7c4a5d22809fe4f9921f34c2ffd38577d4ea138..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/300003328/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8cafd2fe492d791fe5a7576b267224fc941fead3049b087191e786ce42913716 -size 24656 diff --git a/backwards_TL/xxt7ihjo/400003072/cfg.json b/backwards_TL/xxt7ihjo/400003072/cfg.json deleted file mode 100644 index 7479448673316736a22f2dde7cd0d4969b091f03..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/400003072/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.3.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 3, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L4", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L3_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/xxt7ihjo", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/xxt7ihjo/400003072/sae_weights.safetensors b/backwards_TL/xxt7ihjo/400003072/sae_weights.safetensors deleted file mode 100644 index 8d7c975f72df38cd154969598206a1ba5c812837..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/400003072/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aac87ed542a6f4925cb0135ea8eb2f21811b110e4fabbff5feb5f656260142eb -size 37801344 diff --git a/backwards_TL/xxt7ihjo/400003072/sparsity.safetensors b/backwards_TL/xxt7ihjo/400003072/sparsity.safetensors deleted file mode 100644 index 5b1d5bb9ae6171be5931ba55192db09d3cc24fb2..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/400003072/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0bd88845ec317d32e113286382ccab09a8e5ff437f98d100f39937331d110b9b -size 24656 diff --git a/backwards_TL/xxt7ihjo/final_500002816/cfg.json b/backwards_TL/xxt7ihjo/final_500002816/cfg.json deleted file mode 100644 index 7479448673316736a22f2dde7cd0d4969b091f03..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/final_500002816/cfg.json +++ /dev/null @@ -1 +0,0 @@ -{"model_name": "pythia-160m-deduped", "model_class_name": "HookedTransformer", "hook_name": "blocks.3.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 3, "hook_head_index": null, "dataset_path": "NeelNanda/pile-small-tokenized-2b", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 1024, "use_cached_activations": false, "cached_activations_path": null, "architecture": "jumprelu", "d_in": 768, "d_sae": 6144, "b_dec_init_method": "zeros", "expansion_factor": 8, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": true, "noise_scale": 0.0, "from_pretrained_path": "./hub/models--mech-interp--pythia-160m-deduped-rs-post/snapshots/ad21dc2dd4070805dbeb842dcbfa14e6ad74e2b9/L4", "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": false, "init_encoder_as_decoder_transpose": false, "n_batches_in_buffer": 128, "training_tokens": 500000000, "finetuning_tokens": 0, "store_batch_size_prompts": 8, "train_batch_size_tokens": 4096, "normalize_activations": "none", "device": "cuda", "act_store_device": "cuda", "seed": 42, "dtype": "float32", "prepend_bos": false, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 1, "lp_norm": 1, "scale_sparsity_penalty_by_decoder_norm": false, "l1_warm_up_steps": 6103, "lr": 1e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 1.0000000000000002e-06, "lr_decay_steps": 24414, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 2000, "dead_feature_window": 1000, "dead_feature_threshold": 1e-06, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "sae-transfer-learning", "wandb_id": null, "run_name": "FT_L3_hook_resid_post_L1_1", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 100, "resume": false, "n_checkpoints": 5, "checkpoint_path": "checkpoints/xxt7ihjo", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "3.14.0", "sae_lens_training_version": "3.14.0", "tokens_per_buffer": 536870912} \ No newline at end of file diff --git a/backwards_TL/xxt7ihjo/final_500002816/sae_weights.safetensors b/backwards_TL/xxt7ihjo/final_500002816/sae_weights.safetensors deleted file mode 100644 index d66e1f05991d86ca4572cb5c182f2b266ee15f7f..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/final_500002816/sae_weights.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f1b831228b16ead1632c4ddcb9db0713d555bde31c1cf07387d89995f3293f9f -size 37801344 diff --git a/backwards_TL/xxt7ihjo/final_500002816/sparsity.safetensors b/backwards_TL/xxt7ihjo/final_500002816/sparsity.safetensors deleted file mode 100644 index a706168d2cc87834c52549db7a1ae80d6a561226..0000000000000000000000000000000000000000 --- a/backwards_TL/xxt7ihjo/final_500002816/sparsity.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1a81e22d9080510bf76fa6ff0572e87b8f59144f7fa6537cedc7ea00e31d70c7 -size 24656