{ "model": "gpt2", "dataset": "wikitext", "subset": "wikitext-103-v1", "output_dir": "output", "num_epochs": 20, "num_tokens": 100000000, "window_size": 256, "batch_size": 64, "learning_rate": 1e-05, "warmup_steps": 3000, "scheduler": "cosine", "weight_decay": 0.1, "random_seed": 42, "eval_steps": 1000, "patience": 5, "id": "26e50955232e9b5c" }