tim-lawson's picture
Push model using huggingface_hub.
804df23 verified
{
"accumulate_grad_batches": 64,
"auxk": 256,
"auxk_coef": 0.03125,
"batch_size": 1,
"dead_steps_threshold": null,
"dead_threshold": 0.001,
"dead_tokens_threshold": 10000000,
"expansion_factor": 64,
"k": 32,
"layers": [
19
],
"lr": 0.0001,
"max_length": 2048,
"model_name": "EleutherAI/pythia-410m-deduped",
"skip_special_tokens": true,
"standardize": true,
"tuned_lens": false
}