{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "138889b92720ce2e", "metadata": { "ExecuteTime": { "end_time": "2024-05-14T09:02:09.162993Z", "start_time": "2024-05-14T09:02:09.134625Z" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", " | runname | \n", "seed | \n", "steps | \n", "agg_score | \n", "commonsense_qa/acc | \n", "commonsense_qa/acc_norm | \n", "hellaswag/acc | \n", "hellaswag/acc_norm | \n", "openbookqa/acc | \n", "openbookqa/acc_norm | \n", "... | \n", "siqa/acc | \n", "siqa/acc_norm | \n", "winogrande/acc | \n", "winogrande/acc_norm | \n", "sciq/acc | \n", "sciq/acc_norm | \n", "arc/acc | \n", "arc/acc_norm | \n", "mmlu/acc | \n", "mmlu/acc_norm | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "big-run-sampled-fineweb-c4-filters | \n", "6 | \n", "0 | \n", "0.330893 | \n", "0.186 | \n", "0.233 | \n", "0.272 | \n", "0.258 | \n", "0.166 | \n", "0.286 | \n", "... | \n", "0.367 | \n", "0.362 | \n", "0.516 | \n", "0.497 | \n", "0.208 | \n", "0.202 | \n", "0.2195 | \n", "0.2510 | \n", "0.230294 | \n", "0.250147 | \n", "
1 | \n", "big-run-sampled-fineweb-c4-filters | \n", "6 | \n", "1000 | \n", "0.359303 | \n", "0.250 | \n", "0.263 | \n", "0.293 | \n", "0.285 | \n", "0.140 | \n", "0.276 | \n", "... | \n", "0.376 | \n", "0.401 | \n", "0.497 | \n", "0.479 | \n", "0.594 | \n", "0.524 | \n", "0.2740 | \n", "0.2985 | \n", "0.241617 | \n", "0.251920 | \n", "
2 | \n", "big-run-sampled-fineweb-c4-filters | \n", "6 | \n", "2000 | \n", "0.375393 | \n", "0.268 | \n", "0.277 | \n", "0.319 | \n", "0.324 | \n", "0.150 | \n", "0.274 | \n", "... | \n", "0.372 | \n", "0.411 | \n", "0.507 | \n", "0.484 | \n", "0.688 | \n", "0.606 | \n", "0.3015 | \n", "0.3270 | \n", "0.246577 | \n", "0.259146 | \n", "
3 | \n", "big-run-sampled-fineweb-c4-filters | \n", "6 | \n", "3000 | \n", "0.389655 | \n", "0.303 | \n", "0.305 | \n", "0.324 | \n", "0.358 | \n", "0.152 | \n", "0.280 | \n", "... | \n", "0.383 | \n", "0.389 | \n", "0.520 | \n", "0.506 | \n", "0.741 | \n", "0.647 | \n", "0.3395 | \n", "0.3405 | \n", "0.255001 | \n", "0.268740 | \n", "
4 | \n", "big-run-sampled-fineweb-c4-filters | \n", "6 | \n", "4000 | \n", "0.401195 | \n", "0.309 | \n", "0.310 | \n", "0.353 | \n", "0.393 | \n", "0.138 | \n", "0.288 | \n", "... | \n", "0.378 | \n", "0.402 | \n", "0.534 | \n", "0.511 | \n", "0.766 | \n", "0.652 | \n", "0.3395 | \n", "0.3495 | \n", "0.256203 | \n", "0.269056 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
667 | \n", "big-run-sampled_full_filtered_no_dedup | \n", "6 | \n", "163000 | \n", "0.466255 | \n", "0.426 | \n", "0.372 | \n", "0.469 | \n", "0.555 | \n", "0.242 | \n", "0.354 | \n", "... | \n", "0.389 | \n", "0.394 | \n", "0.563 | \n", "0.544 | \n", "0.869 | \n", "0.808 | \n", "0.4460 | \n", "0.4435 | \n", "0.297125 | \n", "0.317543 | \n", "
668 | \n", "big-run-sampled_full_filtered_no_dedup | \n", "6 | \n", "164000 | \n", "0.469743 | \n", "0.431 | \n", "0.376 | \n", "0.467 | \n", "0.556 | \n", "0.232 | \n", "0.356 | \n", "... | \n", "0.391 | \n", "0.397 | \n", "0.568 | \n", "0.552 | \n", "0.861 | \n", "0.800 | \n", "0.4450 | \n", "0.4515 | \n", "0.302706 | \n", "0.318447 | \n", "
669 | \n", "big-run-sampled_full_filtered_no_dedup | \n", "6 | \n", "165000 | \n", "0.469847 | \n", "0.426 | \n", "0.375 | \n", "0.472 | \n", "0.549 | \n", "0.234 | \n", "0.364 | \n", "... | \n", "0.389 | \n", "0.401 | \n", "0.562 | \n", "0.548 | \n", "0.867 | \n", "0.795 | \n", "0.4435 | \n", "0.4475 | \n", "0.297586 | \n", "0.319279 | \n", "
670 | \n", "big-run-sampled_full_filtered_no_dedup | \n", "6 | \n", "166000 | \n", "0.467651 | \n", "0.423 | \n", "0.365 | \n", "0.470 | \n", "0.555 | \n", "0.226 | \n", "0.356 | \n", "... | \n", "0.392 | \n", "0.399 | \n", "0.564 | \n", "0.545 | \n", "0.872 | \n", "0.812 | \n", "0.4365 | \n", "0.4475 | \n", "0.297256 | \n", "0.319704 | \n", "
671 | \n", "big-run-sampled_full_filtered_no_dedup | \n", "6 | \n", "167000 | \n", "0.469652 | \n", "0.416 | \n", "0.373 | \n", "0.469 | \n", "0.560 | \n", "0.234 | \n", "0.356 | \n", "... | \n", "0.392 | \n", "0.394 | \n", "0.565 | \n", "0.557 | \n", "0.867 | \n", "0.803 | \n", "0.4430 | \n", "0.4455 | \n", "0.297409 | \n", "0.317717 | \n", "
672 rows × 22 columns
\n", "