\"])\n",
"model = GPT2LMHeadModel(config)\n",
"num_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
"print(f\"Number of trainable parameters: {num_parameters}\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "484c0fc2",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-18T01:50:16.302344Z",
"iopub.status.busy": "2022-04-18T01:50:16.301561Z",
"iopub.status.idle": "2022-04-18T01:50:21.013819Z",
"shell.execute_reply": "2022-04-18T01:50:21.014253Z",
"shell.execute_reply.started": "2022-04-16T12:24:46.722086Z"
},
"papermill": {
"duration": 4.776549,
"end_time": "2022-04-18T01:50:21.014420",
"exception": false,
"start_time": "2022-04-18T01:50:16.237871",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using amp half precision backend\n"
]
}
],
"source": [
"from transformers import EarlyStoppingCallback\n",
"training_args = TrainingArguments(\n",
" output_dir=\"results\",\n",
" eval_steps=2000,\n",
" save_steps=2000,\n",
" evaluation_strategy=\"steps\",\n",
" learning_rate=3e-4,\n",
" per_device_train_batch_size=32,\n",
" per_device_eval_batch_size=64,\n",
" save_total_limit=2,\n",
" num_train_epochs=8,\n",
" fp16=True,\n",
" report_to=\"none\",\n",
" dataloader_num_workers=2,\n",
" group_by_length=True,\n",
" metric_for_best_model = 'loss',\n",
" load_best_model_at_end=True\n",
")\n",
"\n",
"trainer = Trainer(\n",
" model=model,\n",
" args=training_args,\n",
" train_dataset=train_dataset,\n",
" eval_dataset=val_dataset,\n",
" data_collator=collate_fn,\n",
" callbacks = [EarlyStoppingCallback(early_stopping_patience=1)]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "fbc93ddf",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-18T01:50:21.089679Z",
"iopub.status.busy": "2022-04-18T01:50:21.089153Z",
"iopub.status.idle": "2022-04-18T05:43:12.456180Z",
"shell.execute_reply": "2022-04-18T05:43:12.455654Z",
"shell.execute_reply.started": "2022-04-16T12:25:06.616641Z"
},
"papermill": {
"duration": 13971.40658,
"end_time": "2022-04-18T05:43:12.456310",
"exception": false,
"start_time": "2022-04-18T01:50:21.049730",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.7/site-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" FutureWarning,\n",
"***** Running training *****\n",
" Num examples = 605320\n",
" Num Epochs = 8\n",
" Instantaneous batch size per device = 32\n",
" Total train batch size (w. parallel, distributed & accumulation) = 32\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 151336\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [ 58000/151336 3:52:48 < 6:14:39, 4.15 it/s, Epoch 3/8]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
" Validation Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 2000 | \n",
" 4.367700 | \n",
" 4.235631 | \n",
"
\n",
" \n",
" 4000 | \n",
" 3.953300 | \n",
" 3.883913 | \n",
"
\n",
" \n",
" 6000 | \n",
" 3.790700 | \n",
" 3.730361 | \n",
"
\n",
" \n",
" 8000 | \n",
" 3.699500 | \n",
" 3.639758 | \n",
"
\n",
" \n",
" 10000 | \n",
" 3.626500 | \n",
" 3.581570 | \n",
"
\n",
" \n",
" 12000 | \n",
" 3.575800 | \n",
" 3.529477 | \n",
"
\n",
" \n",
" 14000 | \n",
" 3.539500 | \n",
" 3.490788 | \n",
"
\n",
" \n",
" 16000 | \n",
" 3.506100 | \n",
" 3.457211 | \n",
"
\n",
" \n",
" 18000 | \n",
" 3.471100 | \n",
" 3.427910 | \n",
"
\n",
" \n",
" 20000 | \n",
" 3.411700 | \n",
" 3.404946 | \n",
"
\n",
" \n",
" 22000 | \n",
" 3.388500 | \n",
" 3.384355 | \n",
"
\n",
" \n",
" 24000 | \n",
" 3.384500 | \n",
" 3.362393 | \n",
"
\n",
" \n",
" 26000 | \n",
" 3.363900 | \n",
" 3.345612 | \n",
"
\n",
" \n",
" 28000 | \n",
" 3.350600 | \n",
" 3.330873 | \n",
"
\n",
" \n",
" 30000 | \n",
" 3.339300 | \n",
" 3.316820 | \n",
"
\n",
" \n",
" 32000 | \n",
" 3.320600 | \n",
" 3.303108 | \n",
"
\n",
" \n",
" 34000 | \n",
" 3.316600 | \n",
" 3.286899 | \n",
"
\n",
" \n",
" 36000 | \n",
" 3.312900 | \n",
" 3.277738 | \n",
"
\n",
" \n",
" 38000 | \n",
" 3.272500 | \n",
" 3.271317 | \n",
"
\n",
" \n",
" 40000 | \n",
" 3.228100 | \n",
" 3.260200 | \n",
"
\n",
" \n",
" 42000 | \n",
" 3.232000 | \n",
" 3.252335 | \n",
"
\n",
" \n",
" 44000 | \n",
" 3.220500 | \n",
" 3.247865 | \n",
"
\n",
" \n",
" 46000 | \n",
" 3.219700 | \n",
" 3.236358 | \n",
"
\n",
" \n",
" 48000 | \n",
" 3.218000 | \n",
" 3.228396 | \n",
"
\n",
" \n",
" 50000 | \n",
" 3.214900 | \n",
" 3.219474 | \n",
"
\n",
" \n",
" 52000 | \n",
" 3.207100 | \n",
" 3.213028 | \n",
"
\n",
" \n",
" 54000 | \n",
" 3.206800 | \n",
" 3.206626 | \n",
"
\n",
" \n",
" 56000 | \n",
" 3.196200 | \n",
" 3.197654 | \n",
"
\n",
" \n",
" 58000 | \n",
" 3.125000 | \n",
" 3.197687 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-2000\n",
"Configuration saved in results/checkpoint-2000/config.json\n",
"Model weights saved in results/checkpoint-2000/pytorch_model.bin\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-4000\n",
"Configuration saved in results/checkpoint-4000/config.json\n",
"Model weights saved in results/checkpoint-4000/pytorch_model.bin\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-6000\n",
"Configuration saved in results/checkpoint-6000/config.json\n",
"Model weights saved in results/checkpoint-6000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-2000] due to args.save_total_limit\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-8000\n",
"Configuration saved in results/checkpoint-8000/config.json\n",
"Model weights saved in results/checkpoint-8000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-4000] due to args.save_total_limit\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-10000\n",
"Configuration saved in results/checkpoint-10000/config.json\n",
"Model weights saved in results/checkpoint-10000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-6000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-12000\n",
"Configuration saved in results/checkpoint-12000/config.json\n",
"Model weights saved in results/checkpoint-12000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-8000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-14000\n",
"Configuration saved in results/checkpoint-14000/config.json\n",
"Model weights saved in results/checkpoint-14000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-10000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-16000\n",
"Configuration saved in results/checkpoint-16000/config.json\n",
"Model weights saved in results/checkpoint-16000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-12000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-18000\n",
"Configuration saved in results/checkpoint-18000/config.json\n",
"Model weights saved in results/checkpoint-18000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-14000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-20000\n",
"Configuration saved in results/checkpoint-20000/config.json\n",
"Model weights saved in results/checkpoint-20000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-16000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-22000\n",
"Configuration saved in results/checkpoint-22000/config.json\n",
"Model weights saved in results/checkpoint-22000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-18000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-24000\n",
"Configuration saved in results/checkpoint-24000/config.json\n",
"Model weights saved in results/checkpoint-24000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-20000] due to args.save_total_limit\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-26000\n",
"Configuration saved in results/checkpoint-26000/config.json\n",
"Model weights saved in results/checkpoint-26000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-22000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-28000\n",
"Configuration saved in results/checkpoint-28000/config.json\n",
"Model weights saved in results/checkpoint-28000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-24000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-30000\n",
"Configuration saved in results/checkpoint-30000/config.json\n",
"Model weights saved in results/checkpoint-30000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-26000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-32000\n",
"Configuration saved in results/checkpoint-32000/config.json\n",
"Model weights saved in results/checkpoint-32000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-28000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-34000\n",
"Configuration saved in results/checkpoint-34000/config.json\n",
"Model weights saved in results/checkpoint-34000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-30000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-36000\n",
"Configuration saved in results/checkpoint-36000/config.json\n",
"Model weights saved in results/checkpoint-36000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-32000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-38000\n",
"Configuration saved in results/checkpoint-38000/config.json\n",
"Model weights saved in results/checkpoint-38000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-34000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-40000\n",
"Configuration saved in results/checkpoint-40000/config.json\n",
"Model weights saved in results/checkpoint-40000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-36000] due to args.save_total_limit\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-42000\n",
"Configuration saved in results/checkpoint-42000/config.json\n",
"Model weights saved in results/checkpoint-42000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-38000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-44000\n",
"Configuration saved in results/checkpoint-44000/config.json\n",
"Model weights saved in results/checkpoint-44000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-40000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-46000\n",
"Configuration saved in results/checkpoint-46000/config.json\n",
"Model weights saved in results/checkpoint-46000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-42000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-48000\n",
"Configuration saved in results/checkpoint-48000/config.json\n",
"Model weights saved in results/checkpoint-48000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-44000] due to args.save_total_limit\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-50000\n",
"Configuration saved in results/checkpoint-50000/config.json\n",
"Model weights saved in results/checkpoint-50000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-46000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-52000\n",
"Configuration saved in results/checkpoint-52000/config.json\n",
"Model weights saved in results/checkpoint-52000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-48000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-54000\n",
"Configuration saved in results/checkpoint-54000/config.json\n",
"Model weights saved in results/checkpoint-54000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-50000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-56000\n",
"Configuration saved in results/checkpoint-56000/config.json\n",
"Model weights saved in results/checkpoint-56000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-52000] due to args.save_total_limit\n",
"/opt/conda/lib/python3.7/site-packages/transformers/trainer.py:1410: FutureWarning: Non-finite norm encountered in torch.nn.utils.clip_grad_norm_; continuing anyway. Note that the default behavior will change in a future release to error out if a non-finite total norm is encountered. At that point, setting error_if_nonfinite=false will be required to retain the old behavior.\n",
" args.max_grad_norm,\n",
"***** Running Evaluation *****\n",
" Num examples = 12354\n",
" Batch size = 64\n",
"Saving model checkpoint to results/checkpoint-58000\n",
"Configuration saved in results/checkpoint-58000/config.json\n",
"Model weights saved in results/checkpoint-58000/pytorch_model.bin\n",
"Deleting older checkpoint [results/checkpoint-54000] due to args.save_total_limit\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"Loading best model from results/checkpoint-56000 (score: 3.1976535320281982).\n"
]
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=58000, training_loss=3.448922660038389, metrics={'train_runtime': 13970.1599, 'train_samples_per_second': 346.636, 'train_steps_per_second': 10.833, 'total_flos': 5.124009885990912e+16, 'train_loss': 3.448922660038389, 'epoch': 3.07})"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# n_embd = 768, n_layer = 12, n_head = 12, 58k steps, 93.4 M parameters, train loss 3.150600, val loss 3.163932\n",
"# n_embd = 768, n_layer = 6, n_head = 12, steps, 50.9 M parameters, train loss , val loss \n",
"# n_embd = 256, n_layer = 4, n_head = 8, steps, 5.94M parameters, train loss 3.374200, val loss 3.339147\n",
"# n_embd = 128, n_layer = 2, n_head = 4, 54k steps, 1.78M parameters, train loss 3.819500, val loss 3.694196\n",
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "127bea6d",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-18T05:43:12.684274Z",
"iopub.status.busy": "2022-04-18T05:43:12.683525Z",
"iopub.status.idle": "2022-04-18T05:43:12.685531Z",
"shell.execute_reply": "2022-04-18T05:43:12.685926Z",
"shell.execute_reply.started": "2022-04-16T12:29:27.832584Z"
},
"papermill": {
"duration": 0.122187,
"end_time": "2022-04-18T05:43:12.686065",
"exception": false,
"start_time": "2022-04-18T05:43:12.563878",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"def generation(prompt, length):\n",
" tokens = tokenizer(prompt=str(length) + prompt)\n",
" output_ids = model.generate(tokens['input_ids'].to(\"cuda\"),\n",
" do_sample=True, \n",
" top_k=50,\n",
" top_p=0.95,\n",
" max_length=100)\n",
" decoded_verse = tokenizer.decode(output_ids)[5:]\n",
" return decoded_verse"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "e7f22169",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-18T05:43:12.909172Z",
"iopub.status.busy": "2022-04-18T05:43:12.908333Z",
"iopub.status.idle": "2022-04-18T05:43:13.116636Z",
"shell.execute_reply": "2022-04-18T05:43:13.117086Z",
"shell.execute_reply.started": "2022-04-16T12:30:03.02288Z"
},
"papermill": {
"duration": 0.325253,
"end_time": "2022-04-18T05:43:13.117240",
"exception": false,
"start_time": "2022-04-18T05:43:12.791987",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Setting `pad_token_id` to `eos_token_id`:10741 for open-end generation.\n"
]
},
{
"data": {
"text/plain": [
"'花明水在溪,好在波上得。月光忽在溪,圆明了不蚀。'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"generation(\"花好月圆\", length=5)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "536bd1dd",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-18T05:43:13.336560Z",
"iopub.status.busy": "2022-04-18T05:43:13.335672Z",
"iopub.status.idle": "2022-04-18T05:43:13.521122Z",
"shell.execute_reply": "2022-04-18T05:43:13.521536Z",
"shell.execute_reply.started": "2022-04-16T12:29:42.949166Z"
},
"papermill": {
"duration": 0.298044,
"end_time": "2022-04-18T05:43:13.521677",
"exception": false,
"start_time": "2022-04-18T05:43:13.223633",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Setting `pad_token_id` to `eos_token_id`:10741 for open-end generation.\n"
]
},
{
"data": {
"text/plain": [
"'下山来访小园中,楼阁清幽景物同。吃吃僧斋分数宿,饭松茶灶有馀功。'"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"generation(\"下楼吃饭\", length=7)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "dd75f0be",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-18T05:43:13.745410Z",
"iopub.status.busy": "2022-04-18T05:43:13.744513Z",
"iopub.status.idle": "2022-04-18T05:43:14.123442Z",
"shell.execute_reply": "2022-04-18T05:43:14.123883Z",
"shell.execute_reply.started": "2022-04-16T12:29:44.683058Z"
},
"papermill": {
"duration": 0.490314,
"end_time": "2022-04-18T05:43:14.124043",
"exception": false,
"start_time": "2022-04-18T05:43:13.633729",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Setting `pad_token_id` to `eos_token_id`:10741 for open-end generation.\n"
]
},
{
"data": {
"text/plain": [
"'大深无坐今夕分明是别年,晚陪花下醉清眠。加餐我自能高咏,班列君应似谪仙。大地星河连太皞,深宵星斗下华躔。无言独向閒庭静,坐对西南又一天。'"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"generation(\"今晚加班\", length=7)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "393331e4",
"metadata": {
"execution": {
"iopub.execute_input": "2022-04-18T05:43:14.346788Z",
"iopub.status.busy": "2022-04-18T05:43:14.345916Z",
"iopub.status.idle": "2022-04-18T05:43:14.539457Z",
"shell.execute_reply": "2022-04-18T05:43:14.539890Z",
"shell.execute_reply.started": "2022-04-16T12:29:56.371973Z"
},
"papermill": {
"duration": 0.307929,
"end_time": "2022-04-18T05:43:14.540041",
"exception": false,
"start_time": "2022-04-18T05:43:14.232112",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Setting `pad_token_id` to `eos_token_id`:10741 for open-end generation.\n"
]
},
{
"data": {
"text/plain": [
"'加餐未暇望天颜,班列群仙戏綵幡。内史赐花频赐宴,卷帘先为看朝元。'"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"generation(\"加班内卷\", length=7)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "ea886add",
"metadata": {
"_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
"_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
"execution": {
"iopub.execute_input": "2022-04-18T05:43:14.760813Z",
"iopub.status.busy": "2022-04-18T05:43:14.759955Z",
"iopub.status.idle": "2022-04-18T05:43:14.761716Z",
"shell.execute_reply": "2022-04-18T05:43:14.762174Z"
},
"papermill": {
"duration": 0.113971,
"end_time": "2022-04-18T05:43:14.762305",
"exception": false,
"start_time": "2022-04-18T05:43:14.648334",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"# # This Python 3 environment comes with many helpful analytics libraries installed\n",
"# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n",
"# # For example, here's several helpful packages to load\n",
"\n",
"# import numpy as np # linear algebra\n",
"# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
"\n",
"# # Input data files are available in the read-only \"../input/\" directory\n",
"# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n",
"\n",
"# import os\n",
"# for dirname, _, filenames in os.walk('/kaggle/input'):\n",
"# for filename in filenames:\n",
"# print(os.path.join(dirname, filename))\n",
"\n",
"# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n",
"# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.10"
},
"papermill": {
"default_parameters": {},
"duration": 14060.414143,
"end_time": "2022-04-18T05:43:17.806051",
"environment_variables": {},
"exception": null,
"input_path": "__notebook__.ipynb",
"output_path": "__notebook__.ipynb",
"parameters": {},
"start_time": "2022-04-18T01:48:57.391908",
"version": "2.3.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}