diff --git "a/huggingface_trump.ipynb" "b/huggingface_trump.ipynb" new file mode 100644--- /dev/null +++ "b/huggingface_trump.ipynb" @@ -0,0 +1,2796 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "machine_shape": "hm" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "gpuClass": "standard", + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "911053acfcee4a0b99fdaa13014ecf1f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0d29b6be52864c56884bfda196e55874", + "IPY_MODEL_a8021c1f961c4789800730dd347421f9", + "IPY_MODEL_642bf3de65f24422b51e1454a463beb1" + ], + "layout": "IPY_MODEL_ae9c94dea4a14389853aa68ecd8d481c" + } + }, + "0d29b6be52864c56884bfda196e55874": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_feadf0422faf47b395f45b790be964bf", + "placeholder": "", + "style": "IPY_MODEL_ecad22a54df24327b03c52cc0e7b4113", + "value": "Downloading: 100%" + } + }, + "a8021c1f961c4789800730dd347421f9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_40f54ee5a8cd43efa7a9371d075630a6", + "max": 665, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9a993c8a4c0440068c08937bff1866d5", + "value": 665 + } + }, + "642bf3de65f24422b51e1454a463beb1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_83869b2be90d4a5999a8652d6c8a6d14", + "placeholder": "", + "style": "IPY_MODEL_238e6a9345094573ad2ecd6d54f8d5d0", + "value": " 665/665 [00:00<00:00, 10.5kB/s]" + } + }, + "ae9c94dea4a14389853aa68ecd8d481c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "feadf0422faf47b395f45b790be964bf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ecad22a54df24327b03c52cc0e7b4113": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "40f54ee5a8cd43efa7a9371d075630a6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9a993c8a4c0440068c08937bff1866d5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "83869b2be90d4a5999a8652d6c8a6d14": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "238e6a9345094573ad2ecd6d54f8d5d0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c0d126c128dd48c998dcd374fc182649": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1b4ad3e69eef46ffa190eaf8ca733d60", + "IPY_MODEL_b70e35bdac11466c82b2cbc27a182972", + "IPY_MODEL_00aa2d85364540d88648aba991074228" + ], + "layout": "IPY_MODEL_6c6cce521ee745d98af6fec370f2d337" + } + }, + "1b4ad3e69eef46ffa190eaf8ca733d60": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5a3c3a4ed0d14debb85f9a55b69433e6", + "placeholder": "", + "style": "IPY_MODEL_e314714751ed4c74b176ca423fa1c520", + "value": "Downloading: 100%" + } + }, + "b70e35bdac11466c82b2cbc27a182972": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e6d4c4156de646cda2764401f99170b7", + "max": 1042301, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f49e30bcf1a043d3b7c9c2bda894c169", + "value": 1042301 + } + }, + "00aa2d85364540d88648aba991074228": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f4e9603647fd425dae2786f0405d2df0", + "placeholder": "", + "style": "IPY_MODEL_7767c82db2404a9097553cf7f540b07e", + "value": " 1.04M/1.04M [00:00<00:00, 7.16MB/s]" + } + }, + "6c6cce521ee745d98af6fec370f2d337": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5a3c3a4ed0d14debb85f9a55b69433e6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e314714751ed4c74b176ca423fa1c520": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e6d4c4156de646cda2764401f99170b7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f49e30bcf1a043d3b7c9c2bda894c169": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f4e9603647fd425dae2786f0405d2df0": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7767c82db2404a9097553cf7f540b07e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3a82180b67cd4b988ac6389b06682a07": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b8a821410a0d465cb4db403a47130ff1", + "IPY_MODEL_65f936e2e5f14dad80ddf9fb476f95bd", + "IPY_MODEL_756c1c5e3a364ebcb8360ec41bdf6dd9" + ], + "layout": "IPY_MODEL_1dbb067f28c64c46926ca201ada453e2" + } + }, + "b8a821410a0d465cb4db403a47130ff1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f8f51c37325741c9a814de7a36842a1c", + "placeholder": "", + "style": "IPY_MODEL_b8500cb37b074231a1ded3f176fac43e", + "value": "Downloading: 100%" + } + }, + "65f936e2e5f14dad80ddf9fb476f95bd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_93b3862ce5b0442dbeb1d726f3b5e1f7", + "max": 456318, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1ab239aa0f874d058678ff045749f009", + "value": 456318 + } + }, + "756c1c5e3a364ebcb8360ec41bdf6dd9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1b896a02d860416a89e07e7cd5c03414", + "placeholder": "", + "style": "IPY_MODEL_4ebd49d003db47c6944562290288f965", + "value": " 456k/456k [00:00<00:00, 8.74MB/s]" + } + }, + "1dbb067f28c64c46926ca201ada453e2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f8f51c37325741c9a814de7a36842a1c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b8500cb37b074231a1ded3f176fac43e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "93b3862ce5b0442dbeb1d726f3b5e1f7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1ab239aa0f874d058678ff045749f009": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "1b896a02d860416a89e07e7cd5c03414": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4ebd49d003db47c6944562290288f965": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "df3fcbff8dbf46b8ab84cdfad41832d2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a8a316be0eef437890ae0a9b0bfde076", + "IPY_MODEL_6fbb76c4b7e14416ab201ee35bab7434", + "IPY_MODEL_64bb8f7afd40423491b018388c298fe3" + ], + "layout": "IPY_MODEL_d3ae66e2ac104b929072434187ce29f1" + } + }, + "a8a316be0eef437890ae0a9b0bfde076": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ddb1075518c34fb2931b7ef24cccfc26", + "placeholder": "", + "style": "IPY_MODEL_bbcd7a8dd261459ca39c0da46c09016c", + "value": "Downloading: 100%" + } + }, + "6fbb76c4b7e14416ab201ee35bab7434": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2ff1219ba37a445d8353c3de82eb28da", + "max": 1355256, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e8f44559c28441218ea6f8d4f683b624", + "value": 1355256 + } + }, + "64bb8f7afd40423491b018388c298fe3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_10479a9321ad4094939f191bd7e01b3e", + "placeholder": "", + "style": "IPY_MODEL_e3a6155dd3d547a7869e47b4bc331add", + "value": " 1.36M/1.36M [00:00<00:00, 15.1MB/s]" + } + }, + "d3ae66e2ac104b929072434187ce29f1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ddb1075518c34fb2931b7ef24cccfc26": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bbcd7a8dd261459ca39c0da46c09016c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2ff1219ba37a445d8353c3de82eb28da": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e8f44559c28441218ea6f8d4f683b624": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "10479a9321ad4094939f191bd7e01b3e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e3a6155dd3d547a7869e47b4bc331add": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1df22013cc6a48c6b5fe61742c65aa48": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_b72126e72996421ca81863423f2cb3c5", + "IPY_MODEL_657159a5b54e4924b1b6b5ef605b1700", + "IPY_MODEL_64ff22a46a9a4908a2d1472c5fa7e99f" + ], + "layout": "IPY_MODEL_35d93ff67e0349fcb5390994d5d26bcd" + } + }, + "b72126e72996421ca81863423f2cb3c5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_daacd6921e604328b09ee58bbaded02b", + "placeholder": "", + "style": "IPY_MODEL_306b15aac2734cf6b883a057e9f1fc56", + "value": "Downloading: 100%" + } + }, + "657159a5b54e4924b1b6b5ef605b1700": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6e26600be4d04f3e943e0ab44c4845e6", + "max": 665, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f50b3c09b56f4f70b06dc84ef2510624", + "value": 665 + } + }, + "64ff22a46a9a4908a2d1472c5fa7e99f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_17460a9051ae45989b82f0039cf71947", + "placeholder": "", + "style": "IPY_MODEL_83caf28cd98c4c49ae34f89e1447c752", + "value": " 665/665 [00:00<00:00, 25.9kB/s]" + } + }, + "35d93ff67e0349fcb5390994d5d26bcd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "daacd6921e604328b09ee58bbaded02b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "306b15aac2734cf6b883a057e9f1fc56": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6e26600be4d04f3e943e0ab44c4845e6": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f50b3c09b56f4f70b06dc84ef2510624": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "17460a9051ae45989b82f0039cf71947": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "83caf28cd98c4c49ae34f89e1447c752": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "28c5a75d84994cb49ab6e93d3e0c6c07": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d01fc3bfc2ac459f8a19f7d7b6fc875c", + "IPY_MODEL_9ef59238c50449c6a2430277c4aef15a", + "IPY_MODEL_6439123840de43568b89ff5208848af9" + ], + "layout": "IPY_MODEL_ba1f8ae7d6d746d1b4c7e67fec7a6b1c" + } + }, + "d01fc3bfc2ac459f8a19f7d7b6fc875c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_82997a4ce4fb4cf2bfea595b1a518759", + "placeholder": "", + "style": "IPY_MODEL_71fbc75efbca443da2c64ac83899e9a6", + "value": "Downloading: 100%" + } + }, + "9ef59238c50449c6a2430277c4aef15a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bcc3d514e5ca48e980b5d9543699bc47", + "max": 548118077, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_fb9bc6fb13784b5794b17890fdb8099a", + "value": 548118077 + } + }, + "6439123840de43568b89ff5208848af9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3542e2b6625b446c925f272358906eb2", + "placeholder": "", + "style": "IPY_MODEL_f220af18d641462c8de743f8ebb0d2dc", + "value": " 548M/548M [00:07<00:00, 76.2MB/s]" + } + }, + "ba1f8ae7d6d746d1b4c7e67fec7a6b1c": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "82997a4ce4fb4cf2bfea595b1a518759": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "71fbc75efbca443da2c64ac83899e9a6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "bcc3d514e5ca48e980b5d9543699bc47": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "fb9bc6fb13784b5794b17890fdb8099a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "3542e2b6625b446c925f272358906eb2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f220af18d641462c8de743f8ebb0d2dc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Fintune GPT2 using HuggingFace & PyTorch" + ], + "metadata": { + "id": "2K_YzZvVxv81" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install --quiet transformers==4.2.2" + ], + "metadata": { + "id": "F4DGSHU_e915", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "3cbb940e-5bc6-439e-be7f-524b4f5a8935" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m45.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m880.6/880.6 kB\u001b[0m \u001b[31m45.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.9/2.9 MB\u001b[0m \u001b[31m82.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for sacremoses (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Based off of [Philipp Schmid's](https://www.philschmid.de/philipp-schmid) [notebook](https://colab.research.google.com/github/philschmid/fine-tune-GPT-2/blob/master/Fine_tune_a_non_English_GPT_2_Model_with_Huggingface.ipynb#scrollTo=laDp891gO25V) with data from the [Trump Twitter Archive](https://www.thetrumparchive.com/?results=1).\n", + "\n", + "- GPT2 [Model Card](https://huggingface.co/gpt2)\n", + "-[HuggingFace's Finetuning Docs](https://huggingface.co/learn/nlp-course/chapter3/3?fw=pt)" + ], + "metadata": { + "id": "lw58eJhpyCww" + } + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "iwZxNbIIbzbR" + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "\n", + "import pandas as pd\n", + "import json\n", + "from transformers import (\n", + " TextDataset,\n", + " DataCollatorForLanguageModeling,\n", + " AutoTokenizer,\n", + " AutoModelWithLMHead,\n", + " get_linear_schedule_with_warmup,\n", + " Trainer,\n", + " TrainingArguments,\n", + " pipeline\n", + ")\n", + "from sklearn.model_selection import train_test_split\n", + "from tqdm.auto import tqdm\n", + "import torch\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "source": [ + "model_name = \"gpt2\"\n", + "\n", + "if model_name == \"gpt2\":\n", + " model_size = \"124M\"\n", + "elif model_name == \"gpt2-medium\":\n", + " model_size = \"355M\"\n", + "elif model_name == \"gpt2-large\":\n", + " model_size = \"774M\"\n", + "elif model_name == \"gpt2-xl\":\n", + " model_size = \"1.5B\"" + ], + "metadata": { + "id": "GxBa9kFFsHaM" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# define some params for model\n", + "max_length = 100\n", + "batch_size = 32\n", + "epochs = 5\n", + "learning_rate = 5e-4\n", + "warmup_steps = 1e2\n", + "epsilon = 1e-8\n", + "\n", + "# produce sample output every 100 steps\n", + "sample_every = 100" + ], + "metadata": { + "id": "1hFiQUbNcANl" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Fetch / Load Data & Preprocess" + ], + "metadata": { + "id": "9PY6SSKlcJNq" + } + }, + { + "cell_type": "code", + "source": [ + "tweets_path = Path(\"./data/tweets.json\")\n", + "train_path = Path(\"./data/train_tweets.csv\")\n", + "dev_path = Path(\"./data/dev_tweets.csv\")\n", + "\n", + "# fetch data if !exists already\n", + "if not tweets_path.exists():\n", + " !mkdir data\n", + " !wget -O ./data/tweets.json \"https://drive.google.com/uc?export=download&id=16wm-2NTKohhcA26w-kaWfhLIGwl_oX95\"\n", + "\n", + "if not (train_path.exists() and dev_path.exists()):\n", + " with open(tweets_path, 'rb') as f:\n", + " # read json file into dict and then parse into df\n", + " as_dict = json.loads(f.read())\n", + " df = pd.DataFrame(as_dict)\n", + " \n", + " # filter df by !retweet\n", + " df = df[df['isRetweet'] == \"f\"]\n", + "\n", + " # filter df to only text\n", + " def is_multimedia(tweet: str):\n", + " if tweet.startswith('https://t.co/'):\n", + " return \"t\"\n", + " else:\n", + " return \"f\"\n", + "\n", + " df['isMultimedia'] = df['text'].apply(lambda x : is_multimedia(x))\n", + " df = df[df['isMultimedia'] == \"f\"]\n", + " df = df.reset_index(drop=True)\n", + "\n", + " # filter tweets to remove 'amp;'\n", + " def remove_amp(tweet):\n", + " tweet = tweet.replace('amp;', '')\n", + " tweet = tweet.replace('amp', '')\n", + " return tweet\n", + " df['text'] = df['text'].apply(lambda x: remove_amp(x))\n", + "\n", + " # rename 'text' column to 'labels'\n", + " # df = df.rename(columns={'text': 'labels'})\n", + " \n", + " # create train, validation splits\n", + " train_data, dev_data = train_test_split(df[['text']], test_size=0.15) \n", + " \n", + " train_data.to_csv(train_path, index=False, header=None)\n", + " dev_data.to_csv(dev_path, index=False, header=None)" + ], + "metadata": { + "id": "aLQVWQ_dcB2h", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1f98aeec-6120-4772-d074-9cc7d65fe131" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2023-04-23 20:45:15-- https://drive.google.com/uc?export=download&id=16wm-2NTKohhcA26w-kaWfhLIGwl_oX95\n", + "Resolving drive.google.com (drive.google.com)... 108.177.126.113, 108.177.126.138, 108.177.126.102, ...\n", + "Connecting to drive.google.com (drive.google.com)|108.177.126.113|:443... connected.\n", + "HTTP request sent, awaiting response... 303 See Other\n", + "Location: https://doc-0c-04-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/5h7rho54blq770420uluq9enivf39nuc/1682282700000/12919236576798385522/*/16wm-2NTKohhcA26w-kaWfhLIGwl_oX95?e=download&uuid=82f91d2d-5c4e-47b4-9acb-d781f8f5f78c [following]\n", + "Warning: wildcards not supported in HTTP.\n", + "--2023-04-23 20:45:21-- https://doc-0c-04-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/5h7rho54blq770420uluq9enivf39nuc/1682282700000/12919236576798385522/*/16wm-2NTKohhcA26w-kaWfhLIGwl_oX95?e=download&uuid=82f91d2d-5c4e-47b4-9acb-d781f8f5f78c\n", + "Resolving doc-0c-04-docs.googleusercontent.com (doc-0c-04-docs.googleusercontent.com)... 108.177.126.132, 2a00:1450:4013:c01::84\n", + "Connecting to doc-0c-04-docs.googleusercontent.com (doc-0c-04-docs.googleusercontent.com)|108.177.126.132|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 17227608 (16M) [application/json]\n", + "Saving to: ‘./data/tweets.json’\n", + "\n", + "./data/tweets.json 100%[===================>] 16.43M 53.4MB/s in 0.3s \n", + "\n", + "2023-04-23 20:45:22 (53.4 MB/s) - ‘./data/tweets.json’ saved [17227608/17227608]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# create tokenized datasets\n", + "tokenizer = AutoTokenizer.from_pretrained(\n", + " model_name, \n", + " pad_token='<|endoftext|>'\n", + ")\n", + "\n", + "# custom load_dataset function because there are no labels\n", + "def load_dataset(train_path, dev_path, tokenizer):\n", + " block_size = 128\n", + " # block_size = tokenizer.model_max_length\n", + " \n", + " train_dataset = TextDataset(\n", + " tokenizer=tokenizer,\n", + " file_path=train_path,\n", + " block_size=block_size)\n", + " \n", + " dev_dataset = TextDataset(\n", + " tokenizer=tokenizer,\n", + " file_path=dev_path,\n", + " block_size=block_size) \n", + " \n", + " data_collator = DataCollatorForLanguageModeling(\n", + " tokenizer=tokenizer, mlm=False,\n", + " )\n", + " return train_dataset, dev_dataset, data_collator\n", + "\n", + "train_dataset, dev_dataset, data_collator = load_dataset(train_path, dev_path, tokenizer)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 220, + "referenced_widgets": [ + "911053acfcee4a0b99fdaa13014ecf1f", + "0d29b6be52864c56884bfda196e55874", + "a8021c1f961c4789800730dd347421f9", + "642bf3de65f24422b51e1454a463beb1", + "ae9c94dea4a14389853aa68ecd8d481c", + "feadf0422faf47b395f45b790be964bf", + "ecad22a54df24327b03c52cc0e7b4113", + "40f54ee5a8cd43efa7a9371d075630a6", + "9a993c8a4c0440068c08937bff1866d5", + "83869b2be90d4a5999a8652d6c8a6d14", + "238e6a9345094573ad2ecd6d54f8d5d0", + "c0d126c128dd48c998dcd374fc182649", + "1b4ad3e69eef46ffa190eaf8ca733d60", + "b70e35bdac11466c82b2cbc27a182972", + "00aa2d85364540d88648aba991074228", + "6c6cce521ee745d98af6fec370f2d337", + "5a3c3a4ed0d14debb85f9a55b69433e6", + "e314714751ed4c74b176ca423fa1c520", + "e6d4c4156de646cda2764401f99170b7", + "f49e30bcf1a043d3b7c9c2bda894c169", + "f4e9603647fd425dae2786f0405d2df0", + "7767c82db2404a9097553cf7f540b07e", + "3a82180b67cd4b988ac6389b06682a07", + "b8a821410a0d465cb4db403a47130ff1", + "65f936e2e5f14dad80ddf9fb476f95bd", + "756c1c5e3a364ebcb8360ec41bdf6dd9", + "1dbb067f28c64c46926ca201ada453e2", + "f8f51c37325741c9a814de7a36842a1c", + "b8500cb37b074231a1ded3f176fac43e", + "93b3862ce5b0442dbeb1d726f3b5e1f7", + "1ab239aa0f874d058678ff045749f009", + "1b896a02d860416a89e07e7cd5c03414", + "4ebd49d003db47c6944562290288f965", + "df3fcbff8dbf46b8ab84cdfad41832d2", + "a8a316be0eef437890ae0a9b0bfde076", + "6fbb76c4b7e14416ab201ee35bab7434", + "64bb8f7afd40423491b018388c298fe3", + "d3ae66e2ac104b929072434187ce29f1", + "ddb1075518c34fb2931b7ef24cccfc26", + "bbcd7a8dd261459ca39c0da46c09016c", + "2ff1219ba37a445d8353c3de82eb28da", + "e8f44559c28441218ea6f8d4f683b624", + "10479a9321ad4094939f191bd7e01b3e", + "e3a6155dd3d547a7869e47b4bc331add" + ] + }, + "id": "DAd1-0nLfcej", + "outputId": "a95627fe-16aa-4f3d-d63f-f80b10ba9529" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading: 0%| | 0.00/665 [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "911053acfcee4a0b99fdaa13014ecf1f" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading: 0%| | 0.00/1.04M [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "c0d126c128dd48c998dcd374fc182649" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading: 0%| | 0.00/456k [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "3a82180b67cd4b988ac6389b06682a07" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading: 0%| | 0.00/1.36M [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "df3fcbff8dbf46b8ab84cdfad41832d2" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.9/dist-packages/transformers/data/datasets/language_modeling.py:54: FutureWarning: This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm.py\n", + " warnings.warn(\n", + "Token indices sequence length is longer than the specified maximum sequence length for this model (1394877 > 1024). Running this sequence through the model will result in indexing errors\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Finetune Model" + ], + "metadata": { + "id": "6szJYteUf9L3" + } + }, + { + "cell_type": "code", + "source": [ + "# model = GPT2LMHeadModel.from_pretrained(\"gpt2\")\n", + "model = AutoModelWithLMHead.from_pretrained(model_name, cache_dir=Path('cache').resolve())\n", + "\n", + "# necessary because of additional bos, eos, pad tokens to embeddings\n", + "model.resize_token_embeddings(len(tokenizer))\n", + "\n", + "# create optimizer and learning rate schedule \n", + "optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, eps=epsilon)\n", + "\n", + "training_steps = len(train_dataset) * epochs\n", + "\n", + "# adjust learning rate during training\n", + "scheduler = get_linear_schedule_with_warmup(optimizer, \n", + " num_warmup_steps = warmup_steps, \n", + " num_training_steps = training_steps)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 138, + "referenced_widgets": [ + "1df22013cc6a48c6b5fe61742c65aa48", + "b72126e72996421ca81863423f2cb3c5", + "657159a5b54e4924b1b6b5ef605b1700", + "64ff22a46a9a4908a2d1472c5fa7e99f", + "35d93ff67e0349fcb5390994d5d26bcd", + "daacd6921e604328b09ee58bbaded02b", + "306b15aac2734cf6b883a057e9f1fc56", + "6e26600be4d04f3e943e0ab44c4845e6", + "f50b3c09b56f4f70b06dc84ef2510624", + "17460a9051ae45989b82f0039cf71947", + "83caf28cd98c4c49ae34f89e1447c752", + "28c5a75d84994cb49ab6e93d3e0c6c07", + "d01fc3bfc2ac459f8a19f7d7b6fc875c", + "9ef59238c50449c6a2430277c4aef15a", + "6439123840de43568b89ff5208848af9", + "ba1f8ae7d6d746d1b4c7e67fec7a6b1c", + "82997a4ce4fb4cf2bfea595b1a518759", + "71fbc75efbca443da2c64ac83899e9a6", + "bcc3d514e5ca48e980b5d9543699bc47", + "fb9bc6fb13784b5794b17890fdb8099a", + "3542e2b6625b446c925f272358906eb2", + "f220af18d641462c8de743f8ebb0d2dc" + ] + }, + "id": "Nv-bFNB1f68X", + "outputId": "04a81f6a-cac1-4fc0-a79a-2828efd66467" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.9/dist-packages/transformers/models/auto/modeling_auto.py:921: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading: 0%| | 0.00/665 [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "1df22013cc6a48c6b5fe61742c65aa48" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Downloading: 0%| | 0.00/548M [00:00, ?B/s]" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "28c5a75d84994cb49ab6e93d3e0c6c07" + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "training_args = TrainingArguments(\n", + " output_dir=f\"./{model_name}-{model_size}-trump\",\n", + " overwrite_output_dir=True,\n", + " num_train_epochs=epochs,\n", + " per_device_train_batch_size=32,\n", + " per_device_eval_batch_size=64,\n", + " eval_steps = 400, # n update steps between two evaluations\n", + " save_steps=800, # n steps per model save \n", + " warmup_steps=500, # n warmup steps for learning rate scheduler\n", + " remove_unused_columns=False,\n", + " prediction_loss_only=True\n", + ")\n", + "\n", + "trainer = Trainer(\n", + " model=model,\n", + " args=training_args,\n", + " data_collator=data_collator,\n", + " train_dataset=train_dataset,\n", + " eval_dataset=dev_dataset,\n", + ")" + ], + "metadata": { + "id": "5OvNyCQagD1I" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# train & save model run\n", + "trainer.train()\n", + "trainer.save_model()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 259 + }, + "id": "Ni9OyHY5gQLw", + "outputId": "0a810a31-c012-4183-d671-b4b78a7de8b4" + }, + "execution_count": 9, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
Step | \n", + "Training Loss | \n", + "
---|---|
500 | \n", + "3.852300 | \n", + "
"
+ ],
+ "text/plain": [
+ " "
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Generate tweets"
+ ],
+ "metadata": {
+ "id": "vJLUI-tSgtaX"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "trump = pipeline(\"text-generation\", model=f\"./{model_name}-{model_size}-trump\", tokenizer=tokenizer, config={\"max_length\":140})"
+ ],
+ "metadata": {
+ "id": "mnpf7Y9zgiUt"
+ },
+ "execution_count": 10,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# give Trump a prompt\n",
+ "trump('The democrats have')"
+ ],
+ "metadata": {
+ "id": "gnVtF1K_h473",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "c951da78-e606-4156-e27c-23fb78de0f2d"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[{'generated_text': 'The democrats have no respect for the people of this Country.” @CynthiaLM\\nThe GOP should move quickly to fix ObamaCare and defund ObamaCare fast!\\n\"\"\"\"\"@nathalie_k: @realDonaldTrump you\\'ll'}]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "trump('Why does the lying news media')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "H02NvY6lEPTJ",
+ "outputId": "bb2f6c81-9505-46de-dc14-d6cf12d72067"
+ },
+ "execution_count": 12,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[{'generated_text': 'Why does the lying news media destroy credibility in the House?\"\\n@TrentBaxter8 Thanks--a great book.\\n\"With all due respect to @BarackObama, the great United States Military is not yet fully operational and is'}]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 12
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "trump(\"Today I'll be\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "n8BoiGLGEScg",
+ "outputId": "2772352f-5433-486d-f741-7c9d7b7a2652"
+ },
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[{'generated_text': 'Today I\\'ll be on @foxandfriends today live from Palm Beach... #CelebrityApprentice\\n\"....I am a winner, but also a winner with a great future. In this case, a great future for Americans at large!'}]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
\n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step \n",
+ " Training Loss \n",
+ " \n",
+ " \n",
+ " 500 \n",
+ " 3.852300 \n",
+ " \n",
+ " \n",
+ " 1000 \n",
+ " 3.408700 \n",
+ " \n",
+ " \n",
+ " \n",
+ "1500 \n",
+ " 3.250800 \n",
+ "