{ "accumulate_grad_batches": 64, "autoencoder": null, "auxk": 256, "auxk_coef": 0.03125, "batch_size": 1, "dead_steps_threshold": null, "dead_threshold": 0.001, "dead_tokens_threshold": 10000000, "expansion_factor": 64, "k": 16, "layers": null, "lr": 0.0001, "max_length": 2048, "model_name": "EleutherAI/pythia-70m-deduped", "skip_special_tokens": true, "standardize": true, "transformer": null }