{ | |
"model": "gpt2", | |
"dataset": "wikitext", | |
"subset": "wikitext-103-v1", | |
"output_dir": "output", | |
"num_epochs": 20, | |
"num_tokens": 100000000, | |
"window_size": 128, | |
"batch_size": 128, | |
"learning_rate": 1e-05, | |
"warmup_steps": 3000, | |
"scheduler": "cosine", | |
"weight_decay": 0.1, | |
"random_seed": 42, | |
"eval_steps": 1000, | |
"patience": 5, | |
"id": "6adb2593f59e6343" | |
} |