Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- benchmark_stats.csv +13 -0
- benchmark_stats.html +618 -0
- benchmark_stats.png +3 -0
- v5_32k_layer_0/cfg.json +1 -0
- v5_32k_layer_0/metrics.json +1 -0
- v5_32k_layer_0/sae_weights.safetensors +3 -0
- v5_32k_layer_0/sparsity.safetensors +3 -0
- v5_32k_layer_1/cfg.json +1 -0
- v5_32k_layer_1/metrics.json +1 -0
- v5_32k_layer_1/sae_weights.safetensors +3 -0
- v5_32k_layer_1/sparsity.safetensors +3 -0
- v5_32k_layer_10/cfg.json +1 -0
- v5_32k_layer_10/metrics.json +1 -0
- v5_32k_layer_10/sae_weights.safetensors +3 -0
- v5_32k_layer_10/sparsity.safetensors +3 -0
- v5_32k_layer_11/cfg.json +1 -0
- v5_32k_layer_11/metrics.json +1 -0
- v5_32k_layer_11/sae_weights.safetensors +3 -0
- v5_32k_layer_11/sparsity.safetensors +3 -0
- v5_32k_layer_2/cfg.json +1 -0
- v5_32k_layer_2/metrics.json +1 -0
- v5_32k_layer_2/sae_weights.safetensors +3 -0
- v5_32k_layer_2/sparsity.safetensors +3 -0
- v5_32k_layer_3/cfg.json +1 -0
- v5_32k_layer_3/metrics.json +1 -0
- v5_32k_layer_3/sae_weights.safetensors +3 -0
- v5_32k_layer_3/sparsity.safetensors +3 -0
- v5_32k_layer_4/cfg.json +1 -0
- v5_32k_layer_4/metrics.json +1 -0
- v5_32k_layer_4/sae_weights.safetensors +3 -0
- v5_32k_layer_4/sparsity.safetensors +3 -0
- v5_32k_layer_5/cfg.json +1 -0
- v5_32k_layer_5/metrics.json +1 -0
- v5_32k_layer_5/sae_weights.safetensors +3 -0
- v5_32k_layer_5/sparsity.safetensors +3 -0
- v5_32k_layer_6/cfg.json +1 -0
- v5_32k_layer_6/metrics.json +1 -0
- v5_32k_layer_6/sae_weights.safetensors +3 -0
- v5_32k_layer_6/sparsity.safetensors +3 -0
- v5_32k_layer_7/cfg.json +1 -0
- v5_32k_layer_7/metrics.json +1 -0
- v5_32k_layer_7/sae_weights.safetensors +3 -0
- v5_32k_layer_7/sparsity.safetensors +3 -0
- v5_32k_layer_8/cfg.json +1 -0
- v5_32k_layer_8/metrics.json +1 -0
- v5_32k_layer_8/sae_weights.safetensors +3 -0
- v5_32k_layer_8/sparsity.safetensors +3 -0
- v5_32k_layer_9/cfg.json +1 -0
- v5_32k_layer_9/metrics.json +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
benchmark_stats.png filter=lfs diff=lfs merge=lfs -text
|
benchmark_stats.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,version,d_sae,layer,kl_div_with_sae,kl_div_with_ablation,ce_loss_with_sae,ce_loss_without_sae,ce_loss_with_ablation,kl_div_score,ce_loss_score,l2_norm_in,l2_norm_out,l2_ratio,l0,l1,explained_variance,mse,total_tokens_evaluated,filepath
|
2 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_0/metrics.json,5,32,0,0.004214919172227383,2.121527671813965,3.6037631034851074,3.599064588546753,5.74860143661499,0.9980132622222063,0.9978141733450269,32.01313781738281,31.89154624938965,0.9962403178215027,31.99397850036621,42.146968841552734,0.9667115211486816,8.074386596679688,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_0/metrics.json
|
3 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_1/metrics.json,5,32,1,0.001881452277302742,0.024065840989351273,3.6013145446777344,3.599064588546753,3.6206326484680176,0.9218206304057585,0.895681107192996,9.714648246765137,9.157854080200195,0.9379116892814636,32.0,82.86055755615234,0.8680867552757263,9.54197883605957,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_1/metrics.json
|
4 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_2/metrics.json,5,32,2,0.002341545419767499,0.031004613265395164,3.6009159088134766,3.599064588546753,3.626660108566284,0.9244775156611632,0.9329122891899364,8.641822814941406,8.045538902282715,0.9296190738677979,32.0,82.86361694335938,0.8532325029373169,9.74060344696045,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_2/metrics.json
|
5 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_3/metrics.json,5,32,3,0.0028422873001545668,0.025133918970823288,3.602360486984253,3.599064588546753,3.6286609172821045,0.8869142809183862,0.8886382677063865,8.571012496948242,7.753783226013184,0.9047597646713257,32.0,81.56338500976562,0.8156890869140625,13.545696258544922,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_3/metrics.json
|
6 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_4/metrics.json,5,32,4,0.003790093120187521,0.026722650974988937,3.603180408477783,3.599064588546753,3.6321334838867188,0.8581692690693428,0.8755380278440674,9.123016357421875,7.9935712814331055,0.8777990341186523,32.0,79.12753295898438,0.7723549604415894,19.59968376159668,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_4/metrics.json
|
7 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_5/metrics.json,5,32,5,0.004055157769471407,0.031378373503685,3.602062940597534,3.599064588546753,3.6277596950531006,0.8707658391218022,0.8955099870384526,10.034396171569824,8.880256652832031,0.8862426280975342,32.0,77.73406219482422,0.7825504541397095,24.739639282226562,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_5/metrics.json
|
8 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_6/metrics.json,5,32,6,0.005056389141827822,0.03278880566358566,3.604351758956909,3.599064588546753,3.634286403656006,0.8457891637253715,0.8498893258693165,11.67806625366211,10.210134506225586,0.877007007598877,32.0,74.85708618164062,0.7534131407737732,35.03990936279297,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_6/metrics.json
|
9 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_7/metrics.json,5,32,7,0.004875506274402142,0.03466065973043442,3.6051602363586426,3.599064588546753,3.634833812713623,0.8593360220976661,0.8295840082118552,13.65020751953125,12.291288375854492,0.9022888541221619,32.0,71.10636901855469,0.7838281393051147,41.60253143310547,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_7/metrics.json
|
10 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_8/metrics.json,5,32,8,0.00555825512856245,0.02938206121325493,3.6046571731567383,3.599064588546753,3.6258018016815186,0.8108282775595406,0.7908314310172635,16.137948989868164,14.443827629089355,0.8963184356689453,32.0,71.65403747558594,0.759009063243866,57.211456298828125,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_8/metrics.json
|
11 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_9/metrics.json,5,32,9,0.004498911090195179,0.028918448835611343,3.6015830039978027,3.599064588546753,3.636500835418701,0.8444276483925708,0.9327278864341259,20.912498474121094,19.139347076416016,0.9177886843681335,32.0,65.82906341552734,0.7807904481887817,77.84382629394531,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_9/metrics.json
|
12 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_10/metrics.json,5,32,10,0.003998876549303532,0.02475181221961975,3.602677822113037,3.599064588546753,3.6404881477355957,0.8384410598374779,0.912773464254675,31.82137680053711,30.121129989624023,0.9457573890686035,32.0,55.563880920410156,0.819293737411499,125.34260559082031,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_10/metrics.json
|
13 |
+
OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_11/metrics.json,5,32,11,0.0037718701642006636,0.10687470436096191,3.6011340618133545,3.599064588546753,3.730869770050049,0.9647075499599846,0.9842989991516394,280.86444091796875,280.543212890625,0.9986675977706909,31.6875,17.145309448242188,0.9678490161895752,180.27102661132812,6144.0,OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_11/metrics.json
|
benchmark_stats.html
ADDED
@@ -0,0 +1,618 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<style type="text/css">
|
2 |
+
#T_b08a5_row0_col2, #T_b08a5_row0_col6, #T_b08a5_row0_col16, #T_b08a5_row0_col17, #T_b08a5_row1_col3, #T_b08a5_row1_col4, #T_b08a5_row1_col6, #T_b08a5_row1_col7, #T_b08a5_row1_col17, #T_b08a5_row2_col4, #T_b08a5_row2_col5, #T_b08a5_row2_col6, #T_b08a5_row2_col7, #T_b08a5_row2_col10, #T_b08a5_row2_col11, #T_b08a5_row2_col17, #T_b08a5_row3_col4, #T_b08a5_row3_col6, #T_b08a5_row3_col7, #T_b08a5_row3_col10, #T_b08a5_row3_col11, #T_b08a5_row3_col17, #T_b08a5_row4_col4, #T_b08a5_row4_col6, #T_b08a5_row4_col10, #T_b08a5_row4_col11, #T_b08a5_row4_col17, #T_b08a5_row5_col4, #T_b08a5_row5_col6, #T_b08a5_row5_col7, #T_b08a5_row5_col17, #T_b08a5_row6_col6, #T_b08a5_row6_col12, #T_b08a5_row6_col15, #T_b08a5_row6_col17, #T_b08a5_row7_col6, #T_b08a5_row7_col17, #T_b08a5_row8_col4, #T_b08a5_row8_col6, #T_b08a5_row8_col7, #T_b08a5_row8_col8, #T_b08a5_row8_col9, #T_b08a5_row8_col17, #T_b08a5_row9_col4, #T_b08a5_row9_col6, #T_b08a5_row9_col17, #T_b08a5_row10_col4, #T_b08a5_row10_col6, #T_b08a5_row10_col17, #T_b08a5_row11_col6, #T_b08a5_row11_col13, #T_b08a5_row11_col14, #T_b08a5_row11_col17 {
|
3 |
+
background-color: #440154;
|
4 |
+
color: #f1f1f1;
|
5 |
+
}
|
6 |
+
#T_b08a5_row0_col3, #T_b08a5_row7_col2 {
|
7 |
+
background-color: #2ab07f;
|
8 |
+
color: #f1f1f1;
|
9 |
+
}
|
10 |
+
#T_b08a5_row0_col4, #T_b08a5_row0_col7, #T_b08a5_row0_col8, #T_b08a5_row0_col9, #T_b08a5_row1_col13, #T_b08a5_row1_col14, #T_b08a5_row2_col13, #T_b08a5_row2_col14, #T_b08a5_row3_col13, #T_b08a5_row4_col13, #T_b08a5_row5_col13, #T_b08a5_row6_col13, #T_b08a5_row7_col5, #T_b08a5_row7_col13, #T_b08a5_row8_col3, #T_b08a5_row8_col13, #T_b08a5_row9_col13, #T_b08a5_row10_col13, #T_b08a5_row11_col2, #T_b08a5_row11_col10, #T_b08a5_row11_col11, #T_b08a5_row11_col12, #T_b08a5_row11_col15, #T_b08a5_row11_col16 {
|
11 |
+
background-color: #fde725;
|
12 |
+
color: #000000;
|
13 |
+
}
|
14 |
+
#T_b08a5_row0_col5 {
|
15 |
+
background-color: #37b878;
|
16 |
+
color: #f1f1f1;
|
17 |
+
}
|
18 |
+
#T_b08a5_row0_col10, #T_b08a5_row0_col11, #T_b08a5_row4_col15 {
|
19 |
+
background-color: #482071;
|
20 |
+
color: #f1f1f1;
|
21 |
+
}
|
22 |
+
#T_b08a5_row0_col12, #T_b08a5_row3_col14 {
|
23 |
+
background-color: #f1e51d;
|
24 |
+
color: #000000;
|
25 |
+
}
|
26 |
+
#T_b08a5_row0_col13 {
|
27 |
+
background-color: #f4e61e;
|
28 |
+
color: #000000;
|
29 |
+
}
|
30 |
+
#T_b08a5_row0_col14 {
|
31 |
+
background-color: #2c738e;
|
32 |
+
color: #f1f1f1;
|
33 |
+
}
|
34 |
+
#T_b08a5_row0_col15 {
|
35 |
+
background-color: #fbe723;
|
36 |
+
color: #000000;
|
37 |
+
}
|
38 |
+
#T_b08a5_row1_col2 {
|
39 |
+
background-color: #482173;
|
40 |
+
color: #f1f1f1;
|
41 |
+
}
|
42 |
+
#T_b08a5_row1_col5, #T_b08a5_row5_col16 {
|
43 |
+
background-color: #482374;
|
44 |
+
color: #f1f1f1;
|
45 |
+
}
|
46 |
+
#T_b08a5_row1_col8, #T_b08a5_row5_col3 {
|
47 |
+
background-color: #21a685;
|
48 |
+
color: #f1f1f1;
|
49 |
+
}
|
50 |
+
#T_b08a5_row1_col9, #T_b08a5_row5_col9 {
|
51 |
+
background-color: #20928c;
|
52 |
+
color: #f1f1f1;
|
53 |
+
}
|
54 |
+
#T_b08a5_row1_col10, #T_b08a5_row1_col11, #T_b08a5_row4_col7, #T_b08a5_row4_col12, #T_b08a5_row5_col10, #T_b08a5_row5_col11, #T_b08a5_row6_col4, #T_b08a5_row6_col7, #T_b08a5_row7_col4, #T_b08a5_row7_col7, #T_b08a5_row9_col7 {
|
55 |
+
background-color: #440256;
|
56 |
+
color: #f1f1f1;
|
57 |
+
}
|
58 |
+
#T_b08a5_row1_col12 {
|
59 |
+
background-color: #21918c;
|
60 |
+
color: #f1f1f1;
|
61 |
+
}
|
62 |
+
#T_b08a5_row1_col15, #T_b08a5_row4_col5 {
|
63 |
+
background-color: #1f988b;
|
64 |
+
color: #f1f1f1;
|
65 |
+
}
|
66 |
+
#T_b08a5_row1_col16, #T_b08a5_row2_col16, #T_b08a5_row6_col10, #T_b08a5_row6_col11, #T_b08a5_row10_col7 {
|
67 |
+
background-color: #450457;
|
68 |
+
color: #f1f1f1;
|
69 |
+
}
|
70 |
+
#T_b08a5_row2_col2 {
|
71 |
+
background-color: #433e85;
|
72 |
+
color: #f1f1f1;
|
73 |
+
}
|
74 |
+
#T_b08a5_row2_col3, #T_b08a5_row9_col15 {
|
75 |
+
background-color: #472d7b;
|
76 |
+
color: #f1f1f1;
|
77 |
+
}
|
78 |
+
#T_b08a5_row2_col8 {
|
79 |
+
background-color: #24aa83;
|
80 |
+
color: #f1f1f1;
|
81 |
+
}
|
82 |
+
#T_b08a5_row2_col9, #T_b08a5_row9_col9 {
|
83 |
+
background-color: #3dbc74;
|
84 |
+
color: #f1f1f1;
|
85 |
+
}
|
86 |
+
#T_b08a5_row2_col12 {
|
87 |
+
background-color: #27808e;
|
88 |
+
color: #f1f1f1;
|
89 |
+
}
|
90 |
+
#T_b08a5_row2_col15 {
|
91 |
+
background-color: #23888e;
|
92 |
+
color: #f1f1f1;
|
93 |
+
}
|
94 |
+
#T_b08a5_row3_col2, #T_b08a5_row5_col5 {
|
95 |
+
background-color: #38588c;
|
96 |
+
color: #f1f1f1;
|
97 |
+
}
|
98 |
+
#T_b08a5_row3_col3, #T_b08a5_row7_col8 {
|
99 |
+
background-color: #3a548c;
|
100 |
+
color: #f1f1f1;
|
101 |
+
}
|
102 |
+
#T_b08a5_row3_col5 {
|
103 |
+
background-color: #306a8e;
|
104 |
+
color: #f1f1f1;
|
105 |
+
}
|
106 |
+
#T_b08a5_row3_col8, #T_b08a5_row4_col9 {
|
107 |
+
background-color: #297a8e;
|
108 |
+
color: #f1f1f1;
|
109 |
+
}
|
110 |
+
#T_b08a5_row3_col9 {
|
111 |
+
background-color: #23898e;
|
112 |
+
color: #f1f1f1;
|
113 |
+
}
|
114 |
+
#T_b08a5_row3_col12 {
|
115 |
+
background-color: #3e4c8a;
|
116 |
+
color: #f1f1f1;
|
117 |
+
}
|
118 |
+
#T_b08a5_row3_col15 {
|
119 |
+
background-color: #365d8d;
|
120 |
+
color: #f1f1f1;
|
121 |
+
}
|
122 |
+
#T_b08a5_row3_col16 {
|
123 |
+
background-color: #470d60;
|
124 |
+
color: #f1f1f1;
|
125 |
+
}
|
126 |
+
#T_b08a5_row4_col2 {
|
127 |
+
background-color: #2d708e;
|
128 |
+
color: #f1f1f1;
|
129 |
+
}
|
130 |
+
#T_b08a5_row4_col3 {
|
131 |
+
background-color: #1f948c;
|
132 |
+
color: #f1f1f1;
|
133 |
+
}
|
134 |
+
#T_b08a5_row4_col8 {
|
135 |
+
background-color: #3b528b;
|
136 |
+
color: #f1f1f1;
|
137 |
+
}
|
138 |
+
#T_b08a5_row4_col14 {
|
139 |
+
background-color: #dae319;
|
140 |
+
color: #000000;
|
141 |
+
}
|
142 |
+
#T_b08a5_row4_col16 {
|
143 |
+
background-color: #481a6c;
|
144 |
+
color: #f1f1f1;
|
145 |
+
}
|
146 |
+
#T_b08a5_row5_col2 {
|
147 |
+
background-color: #25858e;
|
148 |
+
color: #f1f1f1;
|
149 |
+
}
|
150 |
+
#T_b08a5_row5_col8 {
|
151 |
+
background-color: #32648e;
|
152 |
+
color: #f1f1f1;
|
153 |
+
}
|
154 |
+
#T_b08a5_row5_col12 {
|
155 |
+
background-color: #481c6e;
|
156 |
+
color: #f1f1f1;
|
157 |
+
}
|
158 |
+
#T_b08a5_row5_col14 {
|
159 |
+
background-color: #cde11d;
|
160 |
+
color: #000000;
|
161 |
+
}
|
162 |
+
#T_b08a5_row5_col15 {
|
163 |
+
background-color: #472f7d;
|
164 |
+
color: #f1f1f1;
|
165 |
+
}
|
166 |
+
#T_b08a5_row6_col2 {
|
167 |
+
background-color: #1e9b8a;
|
168 |
+
color: #f1f1f1;
|
169 |
+
}
|
170 |
+
#T_b08a5_row6_col3 {
|
171 |
+
background-color: #a5db36;
|
172 |
+
color: #000000;
|
173 |
+
}
|
174 |
+
#T_b08a5_row6_col5 {
|
175 |
+
background-color: #81d34d;
|
176 |
+
color: #000000;
|
177 |
+
}
|
178 |
+
#T_b08a5_row6_col8, #T_b08a5_row7_col9 {
|
179 |
+
background-color: #423f85;
|
180 |
+
color: #f1f1f1;
|
181 |
+
}
|
182 |
+
#T_b08a5_row6_col9, #T_b08a5_row8_col16 {
|
183 |
+
background-color: #365c8d;
|
184 |
+
color: #f1f1f1;
|
185 |
+
}
|
186 |
+
#T_b08a5_row6_col14 {
|
187 |
+
background-color: #addc30;
|
188 |
+
color: #000000;
|
189 |
+
}
|
190 |
+
#T_b08a5_row6_col16, #T_b08a5_row8_col12, #T_b08a5_row9_col5 {
|
191 |
+
background-color: #453781;
|
192 |
+
color: #f1f1f1;
|
193 |
+
}
|
194 |
+
#T_b08a5_row7_col3 {
|
195 |
+
background-color: #84d44b;
|
196 |
+
color: #000000;
|
197 |
+
}
|
198 |
+
#T_b08a5_row7_col10, #T_b08a5_row7_col11 {
|
199 |
+
background-color: #46075a;
|
200 |
+
color: #f1f1f1;
|
201 |
+
}
|
202 |
+
#T_b08a5_row7_col12 {
|
203 |
+
background-color: #404688;
|
204 |
+
color: #f1f1f1;
|
205 |
+
}
|
206 |
+
#T_b08a5_row7_col14, #T_b08a5_row11_col8 {
|
207 |
+
background-color: #89d548;
|
208 |
+
color: #000000;
|
209 |
+
}
|
210 |
+
#T_b08a5_row7_col15 {
|
211 |
+
background-color: #46327e;
|
212 |
+
color: #f1f1f1;
|
213 |
+
}
|
214 |
+
#T_b08a5_row7_col16 {
|
215 |
+
background-color: #424186;
|
216 |
+
color: #f1f1f1;
|
217 |
+
}
|
218 |
+
#T_b08a5_row8_col2 {
|
219 |
+
background-color: #52c569;
|
220 |
+
color: #000000;
|
221 |
+
}
|
222 |
+
#T_b08a5_row8_col5 {
|
223 |
+
background-color: #b0dd2f;
|
224 |
+
color: #000000;
|
225 |
+
}
|
226 |
+
#T_b08a5_row8_col10 {
|
227 |
+
background-color: #460b5e;
|
228 |
+
color: #f1f1f1;
|
229 |
+
}
|
230 |
+
#T_b08a5_row8_col11, #T_b08a5_row8_col15 {
|
231 |
+
background-color: #460a5d;
|
232 |
+
color: #f1f1f1;
|
233 |
+
}
|
234 |
+
#T_b08a5_row8_col14 {
|
235 |
+
background-color: #8ed645;
|
236 |
+
color: #000000;
|
237 |
+
}
|
238 |
+
#T_b08a5_row9_col2 {
|
239 |
+
background-color: #86d549;
|
240 |
+
color: #000000;
|
241 |
+
}
|
242 |
+
#T_b08a5_row9_col3 {
|
243 |
+
background-color: #4ac16d;
|
244 |
+
color: #000000;
|
245 |
+
}
|
246 |
+
#T_b08a5_row9_col8 {
|
247 |
+
background-color: #433d84;
|
248 |
+
color: #f1f1f1;
|
249 |
+
}
|
250 |
+
#T_b08a5_row9_col10 {
|
251 |
+
background-color: #471164;
|
252 |
+
color: #f1f1f1;
|
253 |
+
}
|
254 |
+
#T_b08a5_row9_col11, #T_b08a5_row11_col4 {
|
255 |
+
background-color: #471063;
|
256 |
+
color: #f1f1f1;
|
257 |
+
}
|
258 |
+
#T_b08a5_row9_col12 {
|
259 |
+
background-color: #31688e;
|
260 |
+
color: #f1f1f1;
|
261 |
+
}
|
262 |
+
#T_b08a5_row9_col14 {
|
263 |
+
background-color: #58c765;
|
264 |
+
color: #000000;
|
265 |
+
}
|
266 |
+
#T_b08a5_row9_col16 {
|
267 |
+
background-color: #29798e;
|
268 |
+
color: #f1f1f1;
|
269 |
+
}
|
270 |
+
#T_b08a5_row10_col2 {
|
271 |
+
background-color: #c2df23;
|
272 |
+
color: #000000;
|
273 |
+
}
|
274 |
+
#T_b08a5_row10_col3 {
|
275 |
+
background-color: #1fa287;
|
276 |
+
color: #f1f1f1;
|
277 |
+
}
|
278 |
+
#T_b08a5_row10_col5 {
|
279 |
+
background-color: #287c8e;
|
280 |
+
color: #f1f1f1;
|
281 |
+
}
|
282 |
+
#T_b08a5_row10_col8 {
|
283 |
+
background-color: #46337f;
|
284 |
+
color: #f1f1f1;
|
285 |
+
}
|
286 |
+
#T_b08a5_row10_col9 {
|
287 |
+
background-color: #21a585;
|
288 |
+
color: #f1f1f1;
|
289 |
+
}
|
290 |
+
#T_b08a5_row10_col10 {
|
291 |
+
background-color: #481f70;
|
292 |
+
color: #f1f1f1;
|
293 |
+
}
|
294 |
+
#T_b08a5_row10_col11 {
|
295 |
+
background-color: #481d6f;
|
296 |
+
color: #f1f1f1;
|
297 |
+
}
|
298 |
+
#T_b08a5_row10_col12 {
|
299 |
+
background-color: #1fa088;
|
300 |
+
color: #f1f1f1;
|
301 |
+
}
|
302 |
+
#T_b08a5_row10_col14 {
|
303 |
+
background-color: #20a486;
|
304 |
+
color: #f1f1f1;
|
305 |
+
}
|
306 |
+
#T_b08a5_row10_col15 {
|
307 |
+
background-color: #34618d;
|
308 |
+
color: #f1f1f1;
|
309 |
+
}
|
310 |
+
#T_b08a5_row10_col16 {
|
311 |
+
background-color: #3bbb75;
|
312 |
+
color: #f1f1f1;
|
313 |
+
}
|
314 |
+
#T_b08a5_row11_col3 {
|
315 |
+
background-color: #20938c;
|
316 |
+
color: #f1f1f1;
|
317 |
+
}
|
318 |
+
#T_b08a5_row11_col5, #T_b08a5_row11_col7 {
|
319 |
+
background-color: #481467;
|
320 |
+
color: #f1f1f1;
|
321 |
+
}
|
322 |
+
#T_b08a5_row11_col9 {
|
323 |
+
background-color: #d5e21a;
|
324 |
+
color: #000000;
|
325 |
+
}
|
326 |
+
</style>
|
327 |
+
<table id="T_b08a5">
|
328 |
+
<thead>
|
329 |
+
<tr>
|
330 |
+
<th class="blank level0" > </th>
|
331 |
+
<th id="T_b08a5_level0_col0" class="col_heading level0 col0" >version</th>
|
332 |
+
<th id="T_b08a5_level0_col1" class="col_heading level0 col1" >d_sae</th>
|
333 |
+
<th id="T_b08a5_level0_col2" class="col_heading level0 col2" >layer</th>
|
334 |
+
<th id="T_b08a5_level0_col3" class="col_heading level0 col3" >kl_div_with_sae</th>
|
335 |
+
<th id="T_b08a5_level0_col4" class="col_heading level0 col4" >kl_div_with_ablation</th>
|
336 |
+
<th id="T_b08a5_level0_col5" class="col_heading level0 col5" >ce_loss_with_sae</th>
|
337 |
+
<th id="T_b08a5_level0_col6" class="col_heading level0 col6" >ce_loss_without_sae</th>
|
338 |
+
<th id="T_b08a5_level0_col7" class="col_heading level0 col7" >ce_loss_with_ablation</th>
|
339 |
+
<th id="T_b08a5_level0_col8" class="col_heading level0 col8" >kl_div_score</th>
|
340 |
+
<th id="T_b08a5_level0_col9" class="col_heading level0 col9" >ce_loss_score</th>
|
341 |
+
<th id="T_b08a5_level0_col10" class="col_heading level0 col10" >l2_norm_in</th>
|
342 |
+
<th id="T_b08a5_level0_col11" class="col_heading level0 col11" >l2_norm_out</th>
|
343 |
+
<th id="T_b08a5_level0_col12" class="col_heading level0 col12" >l2_ratio</th>
|
344 |
+
<th id="T_b08a5_level0_col13" class="col_heading level0 col13" >l0</th>
|
345 |
+
<th id="T_b08a5_level0_col14" class="col_heading level0 col14" >l1</th>
|
346 |
+
<th id="T_b08a5_level0_col15" class="col_heading level0 col15" >explained_variance</th>
|
347 |
+
<th id="T_b08a5_level0_col16" class="col_heading level0 col16" >mse</th>
|
348 |
+
<th id="T_b08a5_level0_col17" class="col_heading level0 col17" >total_tokens_evaluated</th>
|
349 |
+
<th id="T_b08a5_level0_col18" class="col_heading level0 col18" >filepath</th>
|
350 |
+
</tr>
|
351 |
+
</thead>
|
352 |
+
<tbody>
|
353 |
+
<tr>
|
354 |
+
<th id="T_b08a5_level0_row0" class="row_heading level0 row0" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_0/metrics.json</th>
|
355 |
+
<td id="T_b08a5_row0_col0" class="data row0 col0" >5</td>
|
356 |
+
<td id="T_b08a5_row0_col1" class="data row0 col1" >32</td>
|
357 |
+
<td id="T_b08a5_row0_col2" class="data row0 col2" >0</td>
|
358 |
+
<td id="T_b08a5_row0_col3" class="data row0 col3" >0.004215</td>
|
359 |
+
<td id="T_b08a5_row0_col4" class="data row0 col4" >2.121528</td>
|
360 |
+
<td id="T_b08a5_row0_col5" class="data row0 col5" >3.603763</td>
|
361 |
+
<td id="T_b08a5_row0_col6" class="data row0 col6" >3.599065</td>
|
362 |
+
<td id="T_b08a5_row0_col7" class="data row0 col7" >5.748601</td>
|
363 |
+
<td id="T_b08a5_row0_col8" class="data row0 col8" >0.998013</td>
|
364 |
+
<td id="T_b08a5_row0_col9" class="data row0 col9" >0.997814</td>
|
365 |
+
<td id="T_b08a5_row0_col10" class="data row0 col10" >32.013138</td>
|
366 |
+
<td id="T_b08a5_row0_col11" class="data row0 col11" >31.891546</td>
|
367 |
+
<td id="T_b08a5_row0_col12" class="data row0 col12" >0.996240</td>
|
368 |
+
<td id="T_b08a5_row0_col13" class="data row0 col13" >31.993979</td>
|
369 |
+
<td id="T_b08a5_row0_col14" class="data row0 col14" >42.146969</td>
|
370 |
+
<td id="T_b08a5_row0_col15" class="data row0 col15" >0.966712</td>
|
371 |
+
<td id="T_b08a5_row0_col16" class="data row0 col16" >8.074387</td>
|
372 |
+
<td id="T_b08a5_row0_col17" class="data row0 col17" >6144.000000</td>
|
373 |
+
<td id="T_b08a5_row0_col18" class="data row0 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_0/metrics.json</td>
|
374 |
+
</tr>
|
375 |
+
<tr>
|
376 |
+
<th id="T_b08a5_level0_row1" class="row_heading level0 row1" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_1/metrics.json</th>
|
377 |
+
<td id="T_b08a5_row1_col0" class="data row1 col0" >5</td>
|
378 |
+
<td id="T_b08a5_row1_col1" class="data row1 col1" >32</td>
|
379 |
+
<td id="T_b08a5_row1_col2" class="data row1 col2" >1</td>
|
380 |
+
<td id="T_b08a5_row1_col3" class="data row1 col3" >0.001881</td>
|
381 |
+
<td id="T_b08a5_row1_col4" class="data row1 col4" >0.024066</td>
|
382 |
+
<td id="T_b08a5_row1_col5" class="data row1 col5" >3.601315</td>
|
383 |
+
<td id="T_b08a5_row1_col6" class="data row1 col6" >3.599065</td>
|
384 |
+
<td id="T_b08a5_row1_col7" class="data row1 col7" >3.620633</td>
|
385 |
+
<td id="T_b08a5_row1_col8" class="data row1 col8" >0.921821</td>
|
386 |
+
<td id="T_b08a5_row1_col9" class="data row1 col9" >0.895681</td>
|
387 |
+
<td id="T_b08a5_row1_col10" class="data row1 col10" >9.714648</td>
|
388 |
+
<td id="T_b08a5_row1_col11" class="data row1 col11" >9.157854</td>
|
389 |
+
<td id="T_b08a5_row1_col12" class="data row1 col12" >0.937912</td>
|
390 |
+
<td id="T_b08a5_row1_col13" class="data row1 col13" >32.000000</td>
|
391 |
+
<td id="T_b08a5_row1_col14" class="data row1 col14" >82.860558</td>
|
392 |
+
<td id="T_b08a5_row1_col15" class="data row1 col15" >0.868087</td>
|
393 |
+
<td id="T_b08a5_row1_col16" class="data row1 col16" >9.541979</td>
|
394 |
+
<td id="T_b08a5_row1_col17" class="data row1 col17" >6144.000000</td>
|
395 |
+
<td id="T_b08a5_row1_col18" class="data row1 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_1/metrics.json</td>
|
396 |
+
</tr>
|
397 |
+
<tr>
|
398 |
+
<th id="T_b08a5_level0_row2" class="row_heading level0 row2" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_2/metrics.json</th>
|
399 |
+
<td id="T_b08a5_row2_col0" class="data row2 col0" >5</td>
|
400 |
+
<td id="T_b08a5_row2_col1" class="data row2 col1" >32</td>
|
401 |
+
<td id="T_b08a5_row2_col2" class="data row2 col2" >2</td>
|
402 |
+
<td id="T_b08a5_row2_col3" class="data row2 col3" >0.002342</td>
|
403 |
+
<td id="T_b08a5_row2_col4" class="data row2 col4" >0.031005</td>
|
404 |
+
<td id="T_b08a5_row2_col5" class="data row2 col5" >3.600916</td>
|
405 |
+
<td id="T_b08a5_row2_col6" class="data row2 col6" >3.599065</td>
|
406 |
+
<td id="T_b08a5_row2_col7" class="data row2 col7" >3.626660</td>
|
407 |
+
<td id="T_b08a5_row2_col8" class="data row2 col8" >0.924478</td>
|
408 |
+
<td id="T_b08a5_row2_col9" class="data row2 col9" >0.932912</td>
|
409 |
+
<td id="T_b08a5_row2_col10" class="data row2 col10" >8.641823</td>
|
410 |
+
<td id="T_b08a5_row2_col11" class="data row2 col11" >8.045539</td>
|
411 |
+
<td id="T_b08a5_row2_col12" class="data row2 col12" >0.929619</td>
|
412 |
+
<td id="T_b08a5_row2_col13" class="data row2 col13" >32.000000</td>
|
413 |
+
<td id="T_b08a5_row2_col14" class="data row2 col14" >82.863617</td>
|
414 |
+
<td id="T_b08a5_row2_col15" class="data row2 col15" >0.853233</td>
|
415 |
+
<td id="T_b08a5_row2_col16" class="data row2 col16" >9.740603</td>
|
416 |
+
<td id="T_b08a5_row2_col17" class="data row2 col17" >6144.000000</td>
|
417 |
+
<td id="T_b08a5_row2_col18" class="data row2 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_2/metrics.json</td>
|
418 |
+
</tr>
|
419 |
+
<tr>
|
420 |
+
<th id="T_b08a5_level0_row3" class="row_heading level0 row3" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_3/metrics.json</th>
|
421 |
+
<td id="T_b08a5_row3_col0" class="data row3 col0" >5</td>
|
422 |
+
<td id="T_b08a5_row3_col1" class="data row3 col1" >32</td>
|
423 |
+
<td id="T_b08a5_row3_col2" class="data row3 col2" >3</td>
|
424 |
+
<td id="T_b08a5_row3_col3" class="data row3 col3" >0.002842</td>
|
425 |
+
<td id="T_b08a5_row3_col4" class="data row3 col4" >0.025134</td>
|
426 |
+
<td id="T_b08a5_row3_col5" class="data row3 col5" >3.602360</td>
|
427 |
+
<td id="T_b08a5_row3_col6" class="data row3 col6" >3.599065</td>
|
428 |
+
<td id="T_b08a5_row3_col7" class="data row3 col7" >3.628661</td>
|
429 |
+
<td id="T_b08a5_row3_col8" class="data row3 col8" >0.886914</td>
|
430 |
+
<td id="T_b08a5_row3_col9" class="data row3 col9" >0.888638</td>
|
431 |
+
<td id="T_b08a5_row3_col10" class="data row3 col10" >8.571012</td>
|
432 |
+
<td id="T_b08a5_row3_col11" class="data row3 col11" >7.753783</td>
|
433 |
+
<td id="T_b08a5_row3_col12" class="data row3 col12" >0.904760</td>
|
434 |
+
<td id="T_b08a5_row3_col13" class="data row3 col13" >32.000000</td>
|
435 |
+
<td id="T_b08a5_row3_col14" class="data row3 col14" >81.563385</td>
|
436 |
+
<td id="T_b08a5_row3_col15" class="data row3 col15" >0.815689</td>
|
437 |
+
<td id="T_b08a5_row3_col16" class="data row3 col16" >13.545696</td>
|
438 |
+
<td id="T_b08a5_row3_col17" class="data row3 col17" >6144.000000</td>
|
439 |
+
<td id="T_b08a5_row3_col18" class="data row3 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_3/metrics.json</td>
|
440 |
+
</tr>
|
441 |
+
<tr>
|
442 |
+
<th id="T_b08a5_level0_row4" class="row_heading level0 row4" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_4/metrics.json</th>
|
443 |
+
<td id="T_b08a5_row4_col0" class="data row4 col0" >5</td>
|
444 |
+
<td id="T_b08a5_row4_col1" class="data row4 col1" >32</td>
|
445 |
+
<td id="T_b08a5_row4_col2" class="data row4 col2" >4</td>
|
446 |
+
<td id="T_b08a5_row4_col3" class="data row4 col3" >0.003790</td>
|
447 |
+
<td id="T_b08a5_row4_col4" class="data row4 col4" >0.026723</td>
|
448 |
+
<td id="T_b08a5_row4_col5" class="data row4 col5" >3.603180</td>
|
449 |
+
<td id="T_b08a5_row4_col6" class="data row4 col6" >3.599065</td>
|
450 |
+
<td id="T_b08a5_row4_col7" class="data row4 col7" >3.632133</td>
|
451 |
+
<td id="T_b08a5_row4_col8" class="data row4 col8" >0.858169</td>
|
452 |
+
<td id="T_b08a5_row4_col9" class="data row4 col9" >0.875538</td>
|
453 |
+
<td id="T_b08a5_row4_col10" class="data row4 col10" >9.123016</td>
|
454 |
+
<td id="T_b08a5_row4_col11" class="data row4 col11" >7.993571</td>
|
455 |
+
<td id="T_b08a5_row4_col12" class="data row4 col12" >0.877799</td>
|
456 |
+
<td id="T_b08a5_row4_col13" class="data row4 col13" >32.000000</td>
|
457 |
+
<td id="T_b08a5_row4_col14" class="data row4 col14" >79.127533</td>
|
458 |
+
<td id="T_b08a5_row4_col15" class="data row4 col15" >0.772355</td>
|
459 |
+
<td id="T_b08a5_row4_col16" class="data row4 col16" >19.599684</td>
|
460 |
+
<td id="T_b08a5_row4_col17" class="data row4 col17" >6144.000000</td>
|
461 |
+
<td id="T_b08a5_row4_col18" class="data row4 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_4/metrics.json</td>
|
462 |
+
</tr>
|
463 |
+
<tr>
|
464 |
+
<th id="T_b08a5_level0_row5" class="row_heading level0 row5" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_5/metrics.json</th>
|
465 |
+
<td id="T_b08a5_row5_col0" class="data row5 col0" >5</td>
|
466 |
+
<td id="T_b08a5_row5_col1" class="data row5 col1" >32</td>
|
467 |
+
<td id="T_b08a5_row5_col2" class="data row5 col2" >5</td>
|
468 |
+
<td id="T_b08a5_row5_col3" class="data row5 col3" >0.004055</td>
|
469 |
+
<td id="T_b08a5_row5_col4" class="data row5 col4" >0.031378</td>
|
470 |
+
<td id="T_b08a5_row5_col5" class="data row5 col5" >3.602063</td>
|
471 |
+
<td id="T_b08a5_row5_col6" class="data row5 col6" >3.599065</td>
|
472 |
+
<td id="T_b08a5_row5_col7" class="data row5 col7" >3.627760</td>
|
473 |
+
<td id="T_b08a5_row5_col8" class="data row5 col8" >0.870766</td>
|
474 |
+
<td id="T_b08a5_row5_col9" class="data row5 col9" >0.895510</td>
|
475 |
+
<td id="T_b08a5_row5_col10" class="data row5 col10" >10.034396</td>
|
476 |
+
<td id="T_b08a5_row5_col11" class="data row5 col11" >8.880257</td>
|
477 |
+
<td id="T_b08a5_row5_col12" class="data row5 col12" >0.886243</td>
|
478 |
+
<td id="T_b08a5_row5_col13" class="data row5 col13" >32.000000</td>
|
479 |
+
<td id="T_b08a5_row5_col14" class="data row5 col14" >77.734062</td>
|
480 |
+
<td id="T_b08a5_row5_col15" class="data row5 col15" >0.782550</td>
|
481 |
+
<td id="T_b08a5_row5_col16" class="data row5 col16" >24.739639</td>
|
482 |
+
<td id="T_b08a5_row5_col17" class="data row5 col17" >6144.000000</td>
|
483 |
+
<td id="T_b08a5_row5_col18" class="data row5 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_5/metrics.json</td>
|
484 |
+
</tr>
|
485 |
+
<tr>
|
486 |
+
<th id="T_b08a5_level0_row6" class="row_heading level0 row6" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_6/metrics.json</th>
|
487 |
+
<td id="T_b08a5_row6_col0" class="data row6 col0" >5</td>
|
488 |
+
<td id="T_b08a5_row6_col1" class="data row6 col1" >32</td>
|
489 |
+
<td id="T_b08a5_row6_col2" class="data row6 col2" >6</td>
|
490 |
+
<td id="T_b08a5_row6_col3" class="data row6 col3" >0.005056</td>
|
491 |
+
<td id="T_b08a5_row6_col4" class="data row6 col4" >0.032789</td>
|
492 |
+
<td id="T_b08a5_row6_col5" class="data row6 col5" >3.604352</td>
|
493 |
+
<td id="T_b08a5_row6_col6" class="data row6 col6" >3.599065</td>
|
494 |
+
<td id="T_b08a5_row6_col7" class="data row6 col7" >3.634286</td>
|
495 |
+
<td id="T_b08a5_row6_col8" class="data row6 col8" >0.845789</td>
|
496 |
+
<td id="T_b08a5_row6_col9" class="data row6 col9" >0.849889</td>
|
497 |
+
<td id="T_b08a5_row6_col10" class="data row6 col10" >11.678066</td>
|
498 |
+
<td id="T_b08a5_row6_col11" class="data row6 col11" >10.210135</td>
|
499 |
+
<td id="T_b08a5_row6_col12" class="data row6 col12" >0.877007</td>
|
500 |
+
<td id="T_b08a5_row6_col13" class="data row6 col13" >32.000000</td>
|
501 |
+
<td id="T_b08a5_row6_col14" class="data row6 col14" >74.857086</td>
|
502 |
+
<td id="T_b08a5_row6_col15" class="data row6 col15" >0.753413</td>
|
503 |
+
<td id="T_b08a5_row6_col16" class="data row6 col16" >35.039909</td>
|
504 |
+
<td id="T_b08a5_row6_col17" class="data row6 col17" >6144.000000</td>
|
505 |
+
<td id="T_b08a5_row6_col18" class="data row6 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_6/metrics.json</td>
|
506 |
+
</tr>
|
507 |
+
<tr>
|
508 |
+
<th id="T_b08a5_level0_row7" class="row_heading level0 row7" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_7/metrics.json</th>
|
509 |
+
<td id="T_b08a5_row7_col0" class="data row7 col0" >5</td>
|
510 |
+
<td id="T_b08a5_row7_col1" class="data row7 col1" >32</td>
|
511 |
+
<td id="T_b08a5_row7_col2" class="data row7 col2" >7</td>
|
512 |
+
<td id="T_b08a5_row7_col3" class="data row7 col3" >0.004876</td>
|
513 |
+
<td id="T_b08a5_row7_col4" class="data row7 col4" >0.034661</td>
|
514 |
+
<td id="T_b08a5_row7_col5" class="data row7 col5" >3.605160</td>
|
515 |
+
<td id="T_b08a5_row7_col6" class="data row7 col6" >3.599065</td>
|
516 |
+
<td id="T_b08a5_row7_col7" class="data row7 col7" >3.634834</td>
|
517 |
+
<td id="T_b08a5_row7_col8" class="data row7 col8" >0.859336</td>
|
518 |
+
<td id="T_b08a5_row7_col9" class="data row7 col9" >0.829584</td>
|
519 |
+
<td id="T_b08a5_row7_col10" class="data row7 col10" >13.650208</td>
|
520 |
+
<td id="T_b08a5_row7_col11" class="data row7 col11" >12.291288</td>
|
521 |
+
<td id="T_b08a5_row7_col12" class="data row7 col12" >0.902289</td>
|
522 |
+
<td id="T_b08a5_row7_col13" class="data row7 col13" >32.000000</td>
|
523 |
+
<td id="T_b08a5_row7_col14" class="data row7 col14" >71.106369</td>
|
524 |
+
<td id="T_b08a5_row7_col15" class="data row7 col15" >0.783828</td>
|
525 |
+
<td id="T_b08a5_row7_col16" class="data row7 col16" >41.602531</td>
|
526 |
+
<td id="T_b08a5_row7_col17" class="data row7 col17" >6144.000000</td>
|
527 |
+
<td id="T_b08a5_row7_col18" class="data row7 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_7/metrics.json</td>
|
528 |
+
</tr>
|
529 |
+
<tr>
|
530 |
+
<th id="T_b08a5_level0_row8" class="row_heading level0 row8" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_8/metrics.json</th>
|
531 |
+
<td id="T_b08a5_row8_col0" class="data row8 col0" >5</td>
|
532 |
+
<td id="T_b08a5_row8_col1" class="data row8 col1" >32</td>
|
533 |
+
<td id="T_b08a5_row8_col2" class="data row8 col2" >8</td>
|
534 |
+
<td id="T_b08a5_row8_col3" class="data row8 col3" >0.005558</td>
|
535 |
+
<td id="T_b08a5_row8_col4" class="data row8 col4" >0.029382</td>
|
536 |
+
<td id="T_b08a5_row8_col5" class="data row8 col5" >3.604657</td>
|
537 |
+
<td id="T_b08a5_row8_col6" class="data row8 col6" >3.599065</td>
|
538 |
+
<td id="T_b08a5_row8_col7" class="data row8 col7" >3.625802</td>
|
539 |
+
<td id="T_b08a5_row8_col8" class="data row8 col8" >0.810828</td>
|
540 |
+
<td id="T_b08a5_row8_col9" class="data row8 col9" >0.790831</td>
|
541 |
+
<td id="T_b08a5_row8_col10" class="data row8 col10" >16.137949</td>
|
542 |
+
<td id="T_b08a5_row8_col11" class="data row8 col11" >14.443828</td>
|
543 |
+
<td id="T_b08a5_row8_col12" class="data row8 col12" >0.896318</td>
|
544 |
+
<td id="T_b08a5_row8_col13" class="data row8 col13" >32.000000</td>
|
545 |
+
<td id="T_b08a5_row8_col14" class="data row8 col14" >71.654037</td>
|
546 |
+
<td id="T_b08a5_row8_col15" class="data row8 col15" >0.759009</td>
|
547 |
+
<td id="T_b08a5_row8_col16" class="data row8 col16" >57.211456</td>
|
548 |
+
<td id="T_b08a5_row8_col17" class="data row8 col17" >6144.000000</td>
|
549 |
+
<td id="T_b08a5_row8_col18" class="data row8 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_8/metrics.json</td>
|
550 |
+
</tr>
|
551 |
+
<tr>
|
552 |
+
<th id="T_b08a5_level0_row9" class="row_heading level0 row9" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_9/metrics.json</th>
|
553 |
+
<td id="T_b08a5_row9_col0" class="data row9 col0" >5</td>
|
554 |
+
<td id="T_b08a5_row9_col1" class="data row9 col1" >32</td>
|
555 |
+
<td id="T_b08a5_row9_col2" class="data row9 col2" >9</td>
|
556 |
+
<td id="T_b08a5_row9_col3" class="data row9 col3" >0.004499</td>
|
557 |
+
<td id="T_b08a5_row9_col4" class="data row9 col4" >0.028918</td>
|
558 |
+
<td id="T_b08a5_row9_col5" class="data row9 col5" >3.601583</td>
|
559 |
+
<td id="T_b08a5_row9_col6" class="data row9 col6" >3.599065</td>
|
560 |
+
<td id="T_b08a5_row9_col7" class="data row9 col7" >3.636501</td>
|
561 |
+
<td id="T_b08a5_row9_col8" class="data row9 col8" >0.844428</td>
|
562 |
+
<td id="T_b08a5_row9_col9" class="data row9 col9" >0.932728</td>
|
563 |
+
<td id="T_b08a5_row9_col10" class="data row9 col10" >20.912498</td>
|
564 |
+
<td id="T_b08a5_row9_col11" class="data row9 col11" >19.139347</td>
|
565 |
+
<td id="T_b08a5_row9_col12" class="data row9 col12" >0.917789</td>
|
566 |
+
<td id="T_b08a5_row9_col13" class="data row9 col13" >32.000000</td>
|
567 |
+
<td id="T_b08a5_row9_col14" class="data row9 col14" >65.829063</td>
|
568 |
+
<td id="T_b08a5_row9_col15" class="data row9 col15" >0.780790</td>
|
569 |
+
<td id="T_b08a5_row9_col16" class="data row9 col16" >77.843826</td>
|
570 |
+
<td id="T_b08a5_row9_col17" class="data row9 col17" >6144.000000</td>
|
571 |
+
<td id="T_b08a5_row9_col18" class="data row9 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_9/metrics.json</td>
|
572 |
+
</tr>
|
573 |
+
<tr>
|
574 |
+
<th id="T_b08a5_level0_row10" class="row_heading level0 row10" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_10/metrics.json</th>
|
575 |
+
<td id="T_b08a5_row10_col0" class="data row10 col0" >5</td>
|
576 |
+
<td id="T_b08a5_row10_col1" class="data row10 col1" >32</td>
|
577 |
+
<td id="T_b08a5_row10_col2" class="data row10 col2" >10</td>
|
578 |
+
<td id="T_b08a5_row10_col3" class="data row10 col3" >0.003999</td>
|
579 |
+
<td id="T_b08a5_row10_col4" class="data row10 col4" >0.024752</td>
|
580 |
+
<td id="T_b08a5_row10_col5" class="data row10 col5" >3.602678</td>
|
581 |
+
<td id="T_b08a5_row10_col6" class="data row10 col6" >3.599065</td>
|
582 |
+
<td id="T_b08a5_row10_col7" class="data row10 col7" >3.640488</td>
|
583 |
+
<td id="T_b08a5_row10_col8" class="data row10 col8" >0.838441</td>
|
584 |
+
<td id="T_b08a5_row10_col9" class="data row10 col9" >0.912773</td>
|
585 |
+
<td id="T_b08a5_row10_col10" class="data row10 col10" >31.821377</td>
|
586 |
+
<td id="T_b08a5_row10_col11" class="data row10 col11" >30.121130</td>
|
587 |
+
<td id="T_b08a5_row10_col12" class="data row10 col12" >0.945757</td>
|
588 |
+
<td id="T_b08a5_row10_col13" class="data row10 col13" >32.000000</td>
|
589 |
+
<td id="T_b08a5_row10_col14" class="data row10 col14" >55.563881</td>
|
590 |
+
<td id="T_b08a5_row10_col15" class="data row10 col15" >0.819294</td>
|
591 |
+
<td id="T_b08a5_row10_col16" class="data row10 col16" >125.342606</td>
|
592 |
+
<td id="T_b08a5_row10_col17" class="data row10 col17" >6144.000000</td>
|
593 |
+
<td id="T_b08a5_row10_col18" class="data row10 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_10/metrics.json</td>
|
594 |
+
</tr>
|
595 |
+
<tr>
|
596 |
+
<th id="T_b08a5_level0_row11" class="row_heading level0 row11" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_11/metrics.json</th>
|
597 |
+
<td id="T_b08a5_row11_col0" class="data row11 col0" >5</td>
|
598 |
+
<td id="T_b08a5_row11_col1" class="data row11 col1" >32</td>
|
599 |
+
<td id="T_b08a5_row11_col2" class="data row11 col2" >11</td>
|
600 |
+
<td id="T_b08a5_row11_col3" class="data row11 col3" >0.003772</td>
|
601 |
+
<td id="T_b08a5_row11_col4" class="data row11 col4" >0.106875</td>
|
602 |
+
<td id="T_b08a5_row11_col5" class="data row11 col5" >3.601134</td>
|
603 |
+
<td id="T_b08a5_row11_col6" class="data row11 col6" >3.599065</td>
|
604 |
+
<td id="T_b08a5_row11_col7" class="data row11 col7" >3.730870</td>
|
605 |
+
<td id="T_b08a5_row11_col8" class="data row11 col8" >0.964708</td>
|
606 |
+
<td id="T_b08a5_row11_col9" class="data row11 col9" >0.984299</td>
|
607 |
+
<td id="T_b08a5_row11_col10" class="data row11 col10" >280.864441</td>
|
608 |
+
<td id="T_b08a5_row11_col11" class="data row11 col11" >280.543213</td>
|
609 |
+
<td id="T_b08a5_row11_col12" class="data row11 col12" >0.998668</td>
|
610 |
+
<td id="T_b08a5_row11_col13" class="data row11 col13" >31.687500</td>
|
611 |
+
<td id="T_b08a5_row11_col14" class="data row11 col14" >17.145309</td>
|
612 |
+
<td id="T_b08a5_row11_col15" class="data row11 col15" >0.967849</td>
|
613 |
+
<td id="T_b08a5_row11_col16" class="data row11 col16" >180.271027</td>
|
614 |
+
<td id="T_b08a5_row11_col17" class="data row11 col17" >6144.000000</td>
|
615 |
+
<td id="T_b08a5_row11_col18" class="data row11 col18" >OAI_GPT2Small_v5_32k_resid_delta_attn/v5_32k_layer_11/metrics.json</td>
|
616 |
+
</tr>
|
617 |
+
</tbody>
|
618 |
+
</table>
|
benchmark_stats.png
ADDED
Git LFS Details
|
v5_32k_layer_0/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.0.hook_attn_out", "hook_layer": 0, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_0/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.004214919172227383, "metrics/kl_div_with_ablation": 2.121527671813965, "metrics/ce_loss_with_sae": 3.6037631034851074, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 5.74860143661499, "metrics/kl_div_score": 0.9980132622222063, "metrics/ce_loss_score": 0.9978141733450269, "metrics/l2_norm_in": 32.01313781738281, "metrics/l2_norm_out": 31.89154624938965, "metrics/l2_ratio": 0.9962403178215027, "metrics/l0": 31.99397850036621, "metrics/l1": 42.146968841552734, "metrics/explained_variance": 0.9667115211486816, "metrics/mse": 8.074386596679688, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_0/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8209cd0dacf3c9638052b5dcb6eed059f1f9cb8da440faab92758b2befdd21eb
|
3 |
+
size 201461056
|
v5_32k_layer_0/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8265db43d0be97981e9be43b9c6bd468c27a14e3fd1b60d4290779d5e3929528
|
3 |
+
size 131152
|
v5_32k_layer_1/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.1.hook_attn_out", "hook_layer": 1, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_1/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.001881452277302742, "metrics/kl_div_with_ablation": 0.024065840989351273, "metrics/ce_loss_with_sae": 3.6013145446777344, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6206326484680176, "metrics/kl_div_score": 0.9218206304057585, "metrics/ce_loss_score": 0.895681107192996, "metrics/l2_norm_in": 9.714648246765137, "metrics/l2_norm_out": 9.157854080200195, "metrics/l2_ratio": 0.9379116892814636, "metrics/l0": 32.0, "metrics/l1": 82.86055755615234, "metrics/explained_variance": 0.8680867552757263, "metrics/mse": 9.54197883605957, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_1/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:227271d3fb262bedb7f2fa188f28e3a05c4a623817f00985681b3b42d66bbb76
|
3 |
+
size 201461056
|
v5_32k_layer_1/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f43a8be433ddcb0d02d93d37ec7edf47f4fa8263151f8a342d8a153e658e9fb1
|
3 |
+
size 131152
|
v5_32k_layer_10/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.10.hook_attn_out", "hook_layer": 10, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_10/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.003998876549303532, "metrics/kl_div_with_ablation": 0.02475181221961975, "metrics/ce_loss_with_sae": 3.602677822113037, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6404881477355957, "metrics/kl_div_score": 0.8384410598374779, "metrics/ce_loss_score": 0.912773464254675, "metrics/l2_norm_in": 31.82137680053711, "metrics/l2_norm_out": 30.121129989624023, "metrics/l2_ratio": 0.9457573890686035, "metrics/l0": 32.0, "metrics/l1": 55.563880920410156, "metrics/explained_variance": 0.819293737411499, "metrics/mse": 125.34260559082031, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_10/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2d384b39c05ee6a1951e730e3f7c8a6f4017bbb59b20b0d78f72edbe08984d1
|
3 |
+
size 201461056
|
v5_32k_layer_10/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:534fd6f949997b868a8f558984957f79f1fd0902a3dd66da38e7148fb74529cf
|
3 |
+
size 131152
|
v5_32k_layer_11/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.11.hook_attn_out", "hook_layer": 11, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_11/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.0037718701642006636, "metrics/kl_div_with_ablation": 0.10687470436096191, "metrics/ce_loss_with_sae": 3.6011340618133545, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.730869770050049, "metrics/kl_div_score": 0.9647075499599846, "metrics/ce_loss_score": 0.9842989991516394, "metrics/l2_norm_in": 280.86444091796875, "metrics/l2_norm_out": 280.543212890625, "metrics/l2_ratio": 0.9986675977706909, "metrics/l0": 31.6875, "metrics/l1": 17.145309448242188, "metrics/explained_variance": 0.9678490161895752, "metrics/mse": 180.27102661132812, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_11/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9c4edd6ed3c82033d3be87829b9f7384a8c2a35cc1501c9a401dde0c60de470
|
3 |
+
size 201461056
|
v5_32k_layer_11/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d0acf780230a6be893d1c7f32327b63d5176b59ceab2072a17a0e39ac8fcdc3
|
3 |
+
size 131152
|
v5_32k_layer_2/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.2.hook_attn_out", "hook_layer": 2, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_2/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.002341545419767499, "metrics/kl_div_with_ablation": 0.031004613265395164, "metrics/ce_loss_with_sae": 3.6009159088134766, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.626660108566284, "metrics/kl_div_score": 0.9244775156611632, "metrics/ce_loss_score": 0.9329122891899364, "metrics/l2_norm_in": 8.641822814941406, "metrics/l2_norm_out": 8.045538902282715, "metrics/l2_ratio": 0.9296190738677979, "metrics/l0": 32.0, "metrics/l1": 82.86361694335938, "metrics/explained_variance": 0.8532325029373169, "metrics/mse": 9.74060344696045, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_2/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db275872776ac34530b691887ad719316f0b00f92ad630f4fc7ec238cd50377d
|
3 |
+
size 201461056
|
v5_32k_layer_2/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5eb7d67b660dc6027b4543482780f14982d98f5bb0a4cdf3aa9738c86a7f5393
|
3 |
+
size 131152
|
v5_32k_layer_3/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.3.hook_attn_out", "hook_layer": 3, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_3/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.0028422873001545668, "metrics/kl_div_with_ablation": 0.025133918970823288, "metrics/ce_loss_with_sae": 3.602360486984253, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6286609172821045, "metrics/kl_div_score": 0.8869142809183862, "metrics/ce_loss_score": 0.8886382677063865, "metrics/l2_norm_in": 8.571012496948242, "metrics/l2_norm_out": 7.753783226013184, "metrics/l2_ratio": 0.9047597646713257, "metrics/l0": 32.0, "metrics/l1": 81.56338500976562, "metrics/explained_variance": 0.8156890869140625, "metrics/mse": 13.545696258544922, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_3/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd02ed8e83ae92fd441d6eb62647c28f173ecd4a2c488140474489b6cc2a7b77
|
3 |
+
size 201461056
|
v5_32k_layer_3/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b4478a72ad24dc5af709816db7e211db0b5526c75d0b028944c2d18f1098380
|
3 |
+
size 131152
|
v5_32k_layer_4/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.4.hook_attn_out", "hook_layer": 4, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_4/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.003790093120187521, "metrics/kl_div_with_ablation": 0.026722650974988937, "metrics/ce_loss_with_sae": 3.603180408477783, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6321334838867188, "metrics/kl_div_score": 0.8581692690693428, "metrics/ce_loss_score": 0.8755380278440674, "metrics/l2_norm_in": 9.123016357421875, "metrics/l2_norm_out": 7.9935712814331055, "metrics/l2_ratio": 0.8777990341186523, "metrics/l0": 32.0, "metrics/l1": 79.12753295898438, "metrics/explained_variance": 0.7723549604415894, "metrics/mse": 19.59968376159668, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_4/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ff34559202308047f7e25ef2002a023dfdbdaf12002618b1f3f4f74f2bf4307
|
3 |
+
size 201461056
|
v5_32k_layer_4/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76e2f66395864ba52cb8da7a5d6f13e14d8e3cc44f6b21439a83ff9eb6c0ac6f
|
3 |
+
size 131152
|
v5_32k_layer_5/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.5.hook_attn_out", "hook_layer": 5, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_5/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.004055157769471407, "metrics/kl_div_with_ablation": 0.031378373503685, "metrics/ce_loss_with_sae": 3.602062940597534, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6277596950531006, "metrics/kl_div_score": 0.8707658391218022, "metrics/ce_loss_score": 0.8955099870384526, "metrics/l2_norm_in": 10.034396171569824, "metrics/l2_norm_out": 8.880256652832031, "metrics/l2_ratio": 0.8862426280975342, "metrics/l0": 32.0, "metrics/l1": 77.73406219482422, "metrics/explained_variance": 0.7825504541397095, "metrics/mse": 24.739639282226562, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_5/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47166d73eccc163c3738446b07623ea7e4ed85af51c59c19ebbc71385b2e5e76
|
3 |
+
size 201461056
|
v5_32k_layer_5/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87a88d9c69a1332d9e037690e0fdcd20c11854d02406243b39afca769f29b661
|
3 |
+
size 131152
|
v5_32k_layer_6/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.6.hook_attn_out", "hook_layer": 6, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_6/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.005056389141827822, "metrics/kl_div_with_ablation": 0.03278880566358566, "metrics/ce_loss_with_sae": 3.604351758956909, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.634286403656006, "metrics/kl_div_score": 0.8457891637253715, "metrics/ce_loss_score": 0.8498893258693165, "metrics/l2_norm_in": 11.67806625366211, "metrics/l2_norm_out": 10.210134506225586, "metrics/l2_ratio": 0.877007007598877, "metrics/l0": 32.0, "metrics/l1": 74.85708618164062, "metrics/explained_variance": 0.7534131407737732, "metrics/mse": 35.03990936279297, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_6/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f68fbfac96d5b88f59bdc8dc57049b95f048d88bb534b8f7b0f9fb4d640ecef
|
3 |
+
size 201461056
|
v5_32k_layer_6/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f51bb77a2d34541c0fffe40c4311b9be1be3a15d8b229a2959bfc488eca0c27
|
3 |
+
size 131152
|
v5_32k_layer_7/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.7.hook_attn_out", "hook_layer": 7, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_7/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.004875506274402142, "metrics/kl_div_with_ablation": 0.03466065973043442, "metrics/ce_loss_with_sae": 3.6051602363586426, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.634833812713623, "metrics/kl_div_score": 0.8593360220976661, "metrics/ce_loss_score": 0.8295840082118552, "metrics/l2_norm_in": 13.65020751953125, "metrics/l2_norm_out": 12.291288375854492, "metrics/l2_ratio": 0.9022888541221619, "metrics/l0": 32.0, "metrics/l1": 71.10636901855469, "metrics/explained_variance": 0.7838281393051147, "metrics/mse": 41.60253143310547, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_7/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7de864c425755808d51cd49f65164f0fba524923ae7d0588b1c4c808ffc325fc
|
3 |
+
size 201461056
|
v5_32k_layer_7/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:900dca3640c1fe08bb1e87c3b8d2c51d7bd507aa752543f56926ebd157cb65c1
|
3 |
+
size 131152
|
v5_32k_layer_8/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.8.hook_attn_out", "hook_layer": 8, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_8/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.00555825512856245, "metrics/kl_div_with_ablation": 0.02938206121325493, "metrics/ce_loss_with_sae": 3.6046571731567383, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6258018016815186, "metrics/kl_div_score": 0.8108282775595406, "metrics/ce_loss_score": 0.7908314310172635, "metrics/l2_norm_in": 16.137948989868164, "metrics/l2_norm_out": 14.443827629089355, "metrics/l2_ratio": 0.8963184356689453, "metrics/l0": 32.0, "metrics/l1": 71.65403747558594, "metrics/explained_variance": 0.759009063243866, "metrics/mse": 57.211456298828125, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_8/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d45d5761057c9af25fa6b664409ecdd837d172fde59746647d50ae93f15b481d
|
3 |
+
size 201461056
|
v5_32k_layer_8/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fcff9d1cc927cecfe615a28634cd3a61bca7af5092a0fe05194f8e05cf843f6
|
3 |
+
size 131152
|
v5_32k_layer_9/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.9.hook_attn_out", "hook_layer": 9, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_9/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.004498911090195179, "metrics/kl_div_with_ablation": 0.028918448835611343, "metrics/ce_loss_with_sae": 3.6015830039978027, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.636500835418701, "metrics/kl_div_score": 0.8444276483925708, "metrics/ce_loss_score": 0.9327278864341259, "metrics/l2_norm_in": 20.912498474121094, "metrics/l2_norm_out": 19.139347076416016, "metrics/l2_ratio": 0.9177886843681335, "metrics/l0": 32.0, "metrics/l1": 65.82906341552734, "metrics/explained_variance": 0.7807904481887817, "metrics/mse": 77.84382629394531, "metrics/total_tokens_evaluated": 6144}
|