{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "collapsed_sections": [ "CxpgHz4AJZm1" ] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "7390ce466ea444ef9dd84d8f9998697a": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_885b9d5f25194ff3b3be20c4bd5836b5", "IPY_MODEL_7be703f66d8b491898d598d9647edd2a", "IPY_MODEL_1418888c57d542228a37e74f6ddefada" ], "layout": "IPY_MODEL_34e2712c77854875a9b2cd81754b340f" } }, "885b9d5f25194ff3b3be20c4bd5836b5": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_da21b67e37114a0fa9f588736a81ba16", "placeholder": "​", "style": "IPY_MODEL_30bc5d826ed9431c9327b347315f73cf", "value": "Downloading readme: 100%" } }, "7be703f66d8b491898d598d9647edd2a": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7ef51e96f1db46a5a4ed923261b4ced6", "max": 22, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_3f4b53c73dfd4af6950ea67aef5753bc", "value": 22 } }, "1418888c57d542228a37e74f6ddefada": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a0b8523937c646bc84bf9d4769e4fbb0", "placeholder": "​", "style": "IPY_MODEL_0a4ea77041814bb6b4288738396d2c7c", "value": " 22.0/22.0 [00:00<00:00, 785B/s]" } }, "34e2712c77854875a9b2cd81754b340f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "da21b67e37114a0fa9f588736a81ba16": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "30bc5d826ed9431c9327b347315f73cf": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7ef51e96f1db46a5a4ed923261b4ced6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3f4b53c73dfd4af6950ea67aef5753bc": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "a0b8523937c646bc84bf9d4769e4fbb0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0a4ea77041814bb6b4288738396d2c7c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "01f879d7e6574c3fb1b0d52881e26bcb": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_1c2474521cbd4bd7a66e4323326ad812", "IPY_MODEL_e165fe9283b944408d37eb199e49622d", "IPY_MODEL_0e528279a983454187d0bc46f686358f" ], "layout": "IPY_MODEL_38acf9c703a14608954c2277d82fcefd" } }, "1c2474521cbd4bd7a66e4323326ad812": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d4aa8e99790d41bfb37f442251c2cf6a", "placeholder": "​", "style": "IPY_MODEL_dd5873e5d6354c57b44b383041ad316a", "value": "Downloading data files: 100%" } }, "e165fe9283b944408d37eb199e49622d": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_36828bd3aa4f4f0f88234bf4760d5d7c", "max": 2, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_1717627534c74336ac8a2a74db0bc2ae", "value": 2 } }, "0e528279a983454187d0bc46f686358f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_41040b4e00c449ef85c72248e1af854f", "placeholder": "​", "style": "IPY_MODEL_74e8c6e4891a4c2da5d7ce64d996b118", "value": " 2/2 [00:01<00:00, 1.39it/s]" } }, "38acf9c703a14608954c2277d82fcefd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d4aa8e99790d41bfb37f442251c2cf6a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dd5873e5d6354c57b44b383041ad316a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "36828bd3aa4f4f0f88234bf4760d5d7c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1717627534c74336ac8a2a74db0bc2ae": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "41040b4e00c449ef85c72248e1af854f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "74e8c6e4891a4c2da5d7ce64d996b118": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0657b8cf098045af83457edda078529e": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_bae6180428c948beb8fe1e98b035bce6", "IPY_MODEL_9ed86a45efe140c68560e12720fa4372", "IPY_MODEL_659386ddae304772a124a56d42fa0315" ], "layout": "IPY_MODEL_b0eb14c08e01470ea84e7407793e048b" } }, "bae6180428c948beb8fe1e98b035bce6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cd82e6e1acb2467290a0904159786bb6", "placeholder": "​", "style": "IPY_MODEL_cae15fd2710c4c54ac6fdf58ec3f413b", "value": "Downloading data: " } }, "9ed86a45efe140c68560e12720fa4372": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_108dff22bca542e28782e4c4e39a5320", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_d1e9fbf0b137447ebc975e0b55055e3e", "value": 1 } }, "659386ddae304772a124a56d42fa0315": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e5643622c77547abb60f527e3db41516", "placeholder": "​", "style": "IPY_MODEL_d94cb656acd64f69a87a82bb79fdf9d1", "value": " 3.63M/? [00:00<00:00, 5.71MB/s]" } }, "b0eb14c08e01470ea84e7407793e048b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cd82e6e1acb2467290a0904159786bb6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cae15fd2710c4c54ac6fdf58ec3f413b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "108dff22bca542e28782e4c4e39a5320": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "20px" } }, "d1e9fbf0b137447ebc975e0b55055e3e": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e5643622c77547abb60f527e3db41516": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d94cb656acd64f69a87a82bb79fdf9d1": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "469925c1bfef4254b8ca602e813d764b": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_e90b87693fce4f5fa154be9f47b37e10", "IPY_MODEL_434b659f385e4e9eb9f938df9dc841e0", "IPY_MODEL_7a1991a9691d46a5a8dea64750fb35fb" ], "layout": "IPY_MODEL_6349528e69c447f6b894b2f7ce4781c7" } }, "e90b87693fce4f5fa154be9f47b37e10": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f742d08174744754b81a9568d853ceec", "placeholder": "​", "style": "IPY_MODEL_57c536b3d66043afbf5f7de847434544", "value": "Downloading data: " } }, "434b659f385e4e9eb9f938df9dc841e0": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_10f7bfb60d4c46aba1254efa4459a105", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_45540b4a67fc4c1f90f9266769d00ecb", "value": 1 } }, "7a1991a9691d46a5a8dea64750fb35fb": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ff0ad1fe2ab34558917fe2cad85e996b", "placeholder": "​", "style": "IPY_MODEL_bbd26e73b4e54b4097ebc86e7cecd288", "value": " 465k/? [00:00<00:00, 7.44MB/s]" } }, "6349528e69c447f6b894b2f7ce4781c7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f742d08174744754b81a9568d853ceec": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "57c536b3d66043afbf5f7de847434544": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "10f7bfb60d4c46aba1254efa4459a105": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "20px" } }, "45540b4a67fc4c1f90f9266769d00ecb": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "ff0ad1fe2ab34558917fe2cad85e996b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bbd26e73b4e54b4097ebc86e7cecd288": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "04147f4e9863486bb3e61684b3a92350": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_79717828ef5f4af99873eba1d69bca76", "IPY_MODEL_ee531fea86fd4e8c9b75ae567277dd34", "IPY_MODEL_4d9ba051fe344105899177bc0fe46289" ], "layout": "IPY_MODEL_ddeaff1770fe4e2ba22a72223d16b075" } }, "79717828ef5f4af99873eba1d69bca76": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_26d206cfe46a4b6d8c7376900c60338e", "placeholder": "​", "style": "IPY_MODEL_95cd9893a96a428b9ed9b3cbc779270e", "value": "Extracting data files: 100%" } }, "ee531fea86fd4e8c9b75ae567277dd34": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7c5a2abb666e42cd8a5f54a0e706d91c", "max": 2, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_045974e22b9f43d5b670db92582d207b", "value": 2 } }, "4d9ba051fe344105899177bc0fe46289": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4cac001b34874e0cbc750e77473f90c6", "placeholder": "​", "style": "IPY_MODEL_dbeb9df6b83b4f73a43aa14c81afad19", "value": " 2/2 [00:00<00:00, 74.13it/s]" } }, "ddeaff1770fe4e2ba22a72223d16b075": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "26d206cfe46a4b6d8c7376900c60338e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "95cd9893a96a428b9ed9b3cbc779270e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7c5a2abb666e42cd8a5f54a0e706d91c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "045974e22b9f43d5b670db92582d207b": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "4cac001b34874e0cbc750e77473f90c6": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "dbeb9df6b83b4f73a43aa14c81afad19": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "fe24d32052d04666a125b08908acdcbd": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_1c91261909cd40bdaaa89cd541fcb049", "IPY_MODEL_508fa3f558214c92ba12397196d56d03", "IPY_MODEL_bd165fc607224304924a0b59de54f907" ], "layout": "IPY_MODEL_14e1d6c523c04faaac492d344296aa11" } }, "1c91261909cd40bdaaa89cd541fcb049": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b8e82298443149a2be6e88fdb8445e9b", "placeholder": "​", "style": "IPY_MODEL_932440a040cc450d88febefc510c2863", "value": "Generating train split: " } }, "508fa3f558214c92ba12397196d56d03": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "info", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5d004bbcaede4e88b86f6874b669f687", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_e54c230878574f2381eca7461a585ce0", "value": 1 } }, "bd165fc607224304924a0b59de54f907": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_17d83aa7b83243139d7f489f03eba76f", "placeholder": "​", "style": "IPY_MODEL_909cdbc4668544dbbaa3fc4a37dbb39b", "value": " 27481/0 [00:00<00:00, 210001.00 examples/s]" } }, "14e1d6c523c04faaac492d344296aa11": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": "hidden", "width": null } }, "b8e82298443149a2be6e88fdb8445e9b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "932440a040cc450d88febefc510c2863": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5d004bbcaede4e88b86f6874b669f687": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "20px" } }, "e54c230878574f2381eca7461a585ce0": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "17d83aa7b83243139d7f489f03eba76f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "909cdbc4668544dbbaa3fc4a37dbb39b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "cdb566bc78874ae2a91ef07581c94a5f": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_3fe5011352514962bb94339a1ea71134", "IPY_MODEL_17e2fa9284f94242aca14931a628d05c", "IPY_MODEL_fa949714b74141e4b9673b30e954f4e3" ], "layout": "IPY_MODEL_0da63378646847438fa7a4637f100bb2" } }, "3fe5011352514962bb94339a1ea71134": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_38492e74099b4aae9745d9b047f0263c", "placeholder": "​", "style": "IPY_MODEL_af02a8c9241744779d3230e933da3246", "value": "Generating test split: " } }, "17e2fa9284f94242aca14931a628d05c": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "info", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e42d6959d462467ebb4748282aaa64ce", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_3958632d5a414ba0a797730801a3dd02", "value": 1 } }, "fa949714b74141e4b9673b30e954f4e3": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ce3e209698444035a13c3be5b589cdff", "placeholder": "​", "style": "IPY_MODEL_12e56f34f06f44f9a711a57561c909ed", "value": " 0/0 [00:00<?, ? examples/s]" } }, "0da63378646847438fa7a4637f100bb2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": "hidden", "width": null } }, "38492e74099b4aae9745d9b047f0263c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "af02a8c9241744779d3230e933da3246": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e42d6959d462467ebb4748282aaa64ce": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": "20px" } }, "3958632d5a414ba0a797730801a3dd02": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "ce3e209698444035a13c3be5b589cdff": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "12e56f34f06f44f9a711a57561c909ed": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ce2b7c6a3e70457fb5c3e523835c55a1": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a94f1546928e4129be1375af9a7d87d2", "IPY_MODEL_39b16551d05e409bb7ff301e81be6bad", "IPY_MODEL_007469369bc442c3a8384cda2b629b3d" ], "layout": "IPY_MODEL_2adafce9be4d404cb585568e8bc28360" } }, "a94f1546928e4129be1375af9a7d87d2": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d0c57e8bab6d43d2b13c3bfec70746dc", "placeholder": "​", "style": "IPY_MODEL_8e8c889b981f4a7d98c7a2ad8a4c0a95", "value": "100%" } }, "39b16551d05e409bb7ff301e81be6bad": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6efc5d5c7f9840bb8a8b5585ce2add08", "max": 2, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_4faa5a69373344bfac0491ffc114cddf", "value": 2 } }, "007469369bc442c3a8384cda2b629b3d": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5e82267f05c345859be2aae87322833b", "placeholder": "​", "style": "IPY_MODEL_a6723f52eb68470594a473fac60e0ba0", "value": " 2/2 [00:00<00:00, 55.08it/s]" } }, "2adafce9be4d404cb585568e8bc28360": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d0c57e8bab6d43d2b13c3bfec70746dc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8e8c889b981f4a7d98c7a2ad8a4c0a95": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6efc5d5c7f9840bb8a8b5585ce2add08": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4faa5a69373344bfac0491ffc114cddf": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "5e82267f05c345859be2aae87322833b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a6723f52eb68470594a473fac60e0ba0": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "markdown", "source": [ "# Imports" ], "metadata": { "id": "GyTnitjMG--X" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KjN3_XivFgwM", "outputId": "b38e18c8-1019-4993-e764-b1be442d703d" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n", "[nltk_data] Unzipping corpora/stopwords.zip.\n", "[nltk_data] Downloading package punkt to /root/nltk_data...\n", "[nltk_data] Unzipping tokenizers/punkt.zip.\n", "[nltk_data] Downloading package wordnet to /root/nltk_data...\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting datasets\n", " Downloading datasets-2.13.0-py3-none-any.whl (485 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m485.6/485.6 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.22.4)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n", "Collecting dill<0.3.7,>=0.3.0 (from datasets)\n", " Downloading dill-0.3.6-py3-none-any.whl (110 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.27.1)\n", "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.65.0)\n", "Collecting xxhash (from datasets)\n", " Downloading xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.5/212.5 kB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting multiprocess (from datasets)\n", " Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.4.0)\n", "Collecting aiohttp (from datasets)\n", " Downloading aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m50.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting huggingface-hub<1.0.0,>=0.11.0 (from datasets)\n", " Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.0.12)\n", "Collecting multidict<7.0,>=4.5 (from aiohttp->datasets)\n", " Downloading multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.5/114.5 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting async-timeout<5.0,>=4.0.0a3 (from aiohttp->datasets)\n", " Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", "Collecting yarl<2.0,>=1.0 (from aiohttp->datasets)\n", " Downloading yarl-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (268 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m29.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting frozenlist>=1.1.1 (from aiohttp->datasets)\n", " Downloading frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (149 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.6/149.6 kB\u001b[0m \u001b[31m17.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting aiosignal>=1.1.2 (from aiohttp->datasets)\n", " Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets) (3.12.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.11.0->datasets) (4.5.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (1.26.15)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2022.12.7)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.4)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2022.7.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", "Installing collected packages: xxhash, multidict, frozenlist, dill, async-timeout, yarl, multiprocess, huggingface-hub, aiosignal, aiohttp, datasets\n", "Successfully installed aiohttp-3.8.4 aiosignal-1.3.1 async-timeout-4.0.2 datasets-2.13.0 dill-0.3.6 frozenlist-1.3.3 huggingface-hub-0.15.1 multidict-6.0.4 multiprocess-0.70.14 xxhash-3.2.0 yarl-1.9.2\n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "import re\n", "import string\n", "import os\n", "\n", "import nltk\n", "nltk.download('stopwords')\n", "nltk.download('punkt')\n", "nltk.download('wordnet')\n", "from nltk.corpus import stopwords\n", "from nltk.stem import WordNetLemmatizer, PorterStemmer\n", "\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "import tensorflow as tf\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "from tensorflow.keras.layers import Dense, Dropout, LSTM, Embedding,Bidirectional, GlobalMaxPool1D, SpatialDropout1D\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras import initializers, regularizers, constraints, optimizers, layers\n", "from tensorflow.keras.metrics import Precision, Recall\n", "from sklearn import metrics\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "!pip install datasets\n", "from datasets import load_dataset" ] }, { "cell_type": "code", "source": [], "metadata": { "id": "KAMhRe9hHEZf" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Get and process Dataset\n", "Downloading and loading the dataset from Huggingface. The dataset package is used to get the dataset." ], "metadata": { "id": "1f0WYksXHPAM" } }, { "cell_type": "code", "source": [ "dataset = load_dataset(\"mteb/tweet_sentiment_extraction\",'en')\n", "dataset['train']" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 333, "referenced_widgets": [ "7390ce466ea444ef9dd84d8f9998697a", "885b9d5f25194ff3b3be20c4bd5836b5", "7be703f66d8b491898d598d9647edd2a", "1418888c57d542228a37e74f6ddefada", "34e2712c77854875a9b2cd81754b340f", "da21b67e37114a0fa9f588736a81ba16", "30bc5d826ed9431c9327b347315f73cf", "7ef51e96f1db46a5a4ed923261b4ced6", "3f4b53c73dfd4af6950ea67aef5753bc", "a0b8523937c646bc84bf9d4769e4fbb0", "0a4ea77041814bb6b4288738396d2c7c", "01f879d7e6574c3fb1b0d52881e26bcb", "1c2474521cbd4bd7a66e4323326ad812", "e165fe9283b944408d37eb199e49622d", "0e528279a983454187d0bc46f686358f", "38acf9c703a14608954c2277d82fcefd", "d4aa8e99790d41bfb37f442251c2cf6a", "dd5873e5d6354c57b44b383041ad316a", "36828bd3aa4f4f0f88234bf4760d5d7c", "1717627534c74336ac8a2a74db0bc2ae", "41040b4e00c449ef85c72248e1af854f", "74e8c6e4891a4c2da5d7ce64d996b118", "0657b8cf098045af83457edda078529e", "bae6180428c948beb8fe1e98b035bce6", "9ed86a45efe140c68560e12720fa4372", "659386ddae304772a124a56d42fa0315", "b0eb14c08e01470ea84e7407793e048b", "cd82e6e1acb2467290a0904159786bb6", "cae15fd2710c4c54ac6fdf58ec3f413b", "108dff22bca542e28782e4c4e39a5320", "d1e9fbf0b137447ebc975e0b55055e3e", "e5643622c77547abb60f527e3db41516", "d94cb656acd64f69a87a82bb79fdf9d1", "469925c1bfef4254b8ca602e813d764b", "e90b87693fce4f5fa154be9f47b37e10", "434b659f385e4e9eb9f938df9dc841e0", "7a1991a9691d46a5a8dea64750fb35fb", "6349528e69c447f6b894b2f7ce4781c7", "f742d08174744754b81a9568d853ceec", "57c536b3d66043afbf5f7de847434544", "10f7bfb60d4c46aba1254efa4459a105", "45540b4a67fc4c1f90f9266769d00ecb", "ff0ad1fe2ab34558917fe2cad85e996b", "bbd26e73b4e54b4097ebc86e7cecd288", "04147f4e9863486bb3e61684b3a92350", "79717828ef5f4af99873eba1d69bca76", "ee531fea86fd4e8c9b75ae567277dd34", "4d9ba051fe344105899177bc0fe46289", "ddeaff1770fe4e2ba22a72223d16b075", "26d206cfe46a4b6d8c7376900c60338e", "95cd9893a96a428b9ed9b3cbc779270e", "7c5a2abb666e42cd8a5f54a0e706d91c", "045974e22b9f43d5b670db92582d207b", "4cac001b34874e0cbc750e77473f90c6", "dbeb9df6b83b4f73a43aa14c81afad19", "fe24d32052d04666a125b08908acdcbd", "1c91261909cd40bdaaa89cd541fcb049", "508fa3f558214c92ba12397196d56d03", "bd165fc607224304924a0b59de54f907", "14e1d6c523c04faaac492d344296aa11", "b8e82298443149a2be6e88fdb8445e9b", "932440a040cc450d88febefc510c2863", "5d004bbcaede4e88b86f6874b669f687", "e54c230878574f2381eca7461a585ce0", "17d83aa7b83243139d7f489f03eba76f", "909cdbc4668544dbbaa3fc4a37dbb39b", "cdb566bc78874ae2a91ef07581c94a5f", "3fe5011352514962bb94339a1ea71134", "17e2fa9284f94242aca14931a628d05c", "fa949714b74141e4b9673b30e954f4e3", "0da63378646847438fa7a4637f100bb2", "38492e74099b4aae9745d9b047f0263c", "af02a8c9241744779d3230e933da3246", "e42d6959d462467ebb4748282aaa64ce", "3958632d5a414ba0a797730801a3dd02", "ce3e209698444035a13c3be5b589cdff", "12e56f34f06f44f9a711a57561c909ed", "ce2b7c6a3e70457fb5c3e523835c55a1", "a94f1546928e4129be1375af9a7d87d2", "39b16551d05e409bb7ff301e81be6bad", "007469369bc442c3a8384cda2b629b3d", "2adafce9be4d404cb585568e8bc28360", "d0c57e8bab6d43d2b13c3bfec70746dc", "8e8c889b981f4a7d98c7a2ad8a4c0a95", "6efc5d5c7f9840bb8a8b5585ce2add08", "4faa5a69373344bfac0491ffc114cddf", "5e82267f05c345859be2aae87322833b", "a6723f52eb68470594a473fac60e0ba0" ] }, "id": "mqwYf9xWHUuh", "outputId": "66965e88-8115-4d73-db90-6706371a0654" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Downloading readme: 0%| | 0.00/22.0 [00:00\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtextlabellabel_text
0ce1f6366d9hahahahaha, I have a day off2positive
1cb73f98aa9watching the office......... also comedy gold1neutral
2923295751dI think my bicycle and I just freaked out a cu...0negative
39499e34212How cool is that! Thank you so much! luv it!2positive
4b39e02ae64well then happy mothers day ahahahahahaha2positive
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", " \n", " " ] }, "metadata": {}, "execution_count": 6 } ] }, { "cell_type": "markdown", "source": [ "#### Drop unused columns" ], "metadata": { "id": "TPN-xc1iHo_U" } }, { "cell_type": "code", "source": [ "df_train=train_data_df.copy()\n", "df_test=test_data_df.copy()" ], "metadata": { "id": "asZstARkHjz_" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "df_train.drop(['id','label'],axis=1,inplace=True)\n", "df_test.drop(['id','label'],axis=1,inplace=True)\n", "df_train.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "qPKqxIVwHu_9", "outputId": "112271bf-d4db-47cd-9410-b92482488dd2" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " text label_text\n", "0 hahahahaha, I have a day off positive\n", "1 watching the office......... also comedy gold neutral\n", "2 I think my bicycle and I just freaked out a cu... negative\n", "3 How cool is that! Thank you so much! luv it! positive\n", "4 well then happy mothers day ahahahahahaha positive" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textlabel_text
0hahahahaha, I have a day offpositive
1watching the office......... also comedy goldneutral
2I think my bicycle and I just freaked out a cu...negative
3How cool is that! Thank you so much! luv it!positive
4well then happy mothers day ahahahahahahapositive
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 8 } ] }, { "cell_type": "markdown", "source": [ "#### DF Info" ], "metadata": { "id": "A--EbojPH1A1" } }, { "cell_type": "code", "source": [ "df_train['label_text'].isnull().sum()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "QwE34N-1Hw4O", "outputId": "7a0ec80d-1c2e-497b-bc3a-a5eceae4fc11" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0" ] }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "source": [ "df_train.dtypes" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "R-bQ7kIfH4Lr", "outputId": "1514f226-26dc-4ed4-adbc-edfef2c25e31" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "text object\n", "label_text object\n", "dtype: object" ] }, "metadata": {}, "execution_count": 10 } ] }, { "cell_type": "code", "source": [ "df_train.info()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vZ91HtgCH5-B", "outputId": "45ffe43b-9718-421d-c8d0-8fc2be5d9c5c" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "RangeIndex: 27481 entries, 0 to 27480\n", "Data columns (total 2 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 text 27481 non-null object\n", " 1 label_text 27481 non-null object\n", "dtypes: object(2)\n", "memory usage: 429.5+ KB\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Limit dataset for quick training\n", "This step is only done for this post example. In real scenario, good amount of data will be needed for the training." ], "metadata": { "id": "6PQddaR0H-g-" } }, { "cell_type": "code", "source": [ "df=df_train.copy()\n", "df=df.head(500)" ], "metadata": { "id": "DrtZRy3gH8IC" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "6lDYO8BvICLQ" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Train test split \n", "Splitting the dataset into Training and Testing sets. The Train set will be used for training and the Test one will be used for evaluating the model." ], "metadata": { "id": "bUEBfxgkIHQv" } }, { "cell_type": "code", "source": [ "TEST_SPLIT = 0.2\n", "RANDOM_STATE = 10\n", "np.random.seed(RANDOM_STATE)\n", "tf.random.set_seed(RANDOM_STATE)" ], "metadata": { "id": "W7qXhZvdIJni" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "X_train, X_test, y_train, y_test = train_test_split(df[\"text\"], df[\"label_text\"],\n", " test_size = TEST_SPLIT, random_state = RANDOM_STATE)\n", "\n", "print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fxqHXivAILVc", "outputId": "d5ce0660-92b9-445a-9e26-52445a7c1bb7" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "(400,) (100,) (400,) (100,)\n" ] } ] }, { "cell_type": "code", "source": [ "texts_train=list(X_train)\n", "labels_train=list(y_train)\n", "\n", "texts_test=list(X_test)\n", "labels_test=list(y_test)" ], "metadata": { "id": "c4a1lNf2INRx" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "olKysOM_IQcL" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Pre process steps \n", "For an efficient training, dataset need to be pre-processed to get better results. Below are the steps I am handling here.\n", "\n", "1. Stemming and Lemmatization\n", "2. Tokenizer\n", "3. text to sequence\n", "4. pad_sequence\n", "5. one hot encoding" ], "metadata": { "id": "m2l8krAVITfV" } }, { "cell_type": "markdown", "source": [ "##### Stemming and Lemmatization" ], "metadata": { "id": "y_d_SigYIePC" } }, { "cell_type": "code", "source": [ "lemmatizer = WordNetLemmatizer()\n", "stemmer = PorterStemmer()\n", "stop_words = set(stopwords.words('english'))\n", "patterns = []\n", "tags = []\n", "for i in range(len(texts_train)):\n", " # Convert all text to lowercase\n", " pattern = texts_train[i].lower()\n", "\n", " # Remove non-alphanumeric characters and replace them with space\n", " pattern = re.sub(r'[^a-z0-9]', ' ', pattern)\n", "\n", " # Tokenize text\n", " tokens = nltk.word_tokenize(pattern)\n", "\n", " # Remove stop words\n", " tokens = [token for token in tokens if token not in stop_words]\n", "\n", " # Apply lemmatization and stemming\n", " tokens = [lemmatizer.lemmatize(token) for token in tokens]\n", " tokens = [stemmer.stem(token) for token in tokens]\n", "\n", " # Join the tokens back into a string\n", " pattern = ' '.join(tokens)\n", "\n", " # Append the pattern and tag to respective lists\n", " patterns.append(pattern)\n", " tags.append(labels_train[i])" ], "metadata": { "id": "wrs4BuXGIaQP" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### Tokenizer" ], "metadata": { "id": "zK_S6ABLIk1_" } }, { "cell_type": "code", "source": [ "unique_words = set()\n", "for text in texts_train:\n", " words = nltk.word_tokenize(text.lower())\n", " unique_words.update(words)\n", "len(unique_words)\n", "unique_word_len=len(unique_words)\n", "num_words=unique_word_len+100\n", "tokenizer = Tokenizer(num_words=num_words, oov_token=\"\")\n", "tokenizer.fit_on_texts(patterns)" ], "metadata": { "id": "z4bO5f5KIinV" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### Text to Sequence" ], "metadata": { "id": "g1hSe4SlIxXk" } }, { "cell_type": "code", "source": [ "max_sequence_len = max([len(tokenizer.texts_to_sequences(patterns)[i]) for i in range(len(patterns))])\n", "sequences = tokenizer.texts_to_sequences(patterns)\n", "max_sequence_len=max_sequence_len+100\n", "max_sequence_len" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "gWpOtBFtIqnm", "outputId": "d4a8c94c-65c8-4f7a-9248-071d0d427772" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "122" ] }, "metadata": {}, "execution_count": 18 } ] }, { "cell_type": "markdown", "source": [ "##### Pad Sequences" ], "metadata": { "id": "1uqYV4m1I4LF" } }, { "cell_type": "code", "source": [ "padded_sequences = pad_sequences(sequences, maxlen=max_sequence_len, padding='post')" ], "metadata": { "id": "kropkedQI0zE" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### One Hot encoding" ], "metadata": { "id": "jPaVBZQOJAVY" } }, { "cell_type": "code", "source": [ "training = np.array(padded_sequences)\n", "output = np.array(tags)\n", "output_labels = np.unique(output)\n", "encoder = LabelEncoder()\n", "encoder.fit(output)\n", "encoded_y = encoder.transform(output)\n", "output_encoded = tf.keras.utils.to_categorical(encoded_y)\n", "output_encoded" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "x0Kp2C5LI9nP", "outputId": "b13990c0-8aaf-42a2-ee6d-7122bf0f72a7" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "array([[0., 0., 1.],\n", " [1., 0., 0.],\n", " [0., 0., 1.],\n", " ...,\n", " [0., 1., 0.],\n", " [0., 1., 0.],\n", " [1., 0., 0.]], dtype=float32)" ] }, "metadata": {}, "execution_count": 20 } ] }, { "cell_type": "markdown", "source": [ "# Create Model \n", "I am creating an LSTM model with dropout layer for this example" ], "metadata": { "id": "OnZDEGl2JJ2c" } }, { "cell_type": "code", "source": [ "VAL_SPLIT = 0.1\n", "BATCH_SIZE = 10\n", "EPOCHS = 20\n", "EMBEDDING_DIM = 32\n", "NUM_UNITS = 32\n", "NUM_CLASSES=len(set(labels_train))\n", "VOCAB_SIZE = len(tokenizer.word_index) + 1" ], "metadata": { "id": "8ENmS8h0JHcm" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "model = Sequential([\n", " Embedding(input_dim = VOCAB_SIZE, output_dim = EMBEDDING_DIM, input_length = max_sequence_len, mask_zero = True),\n", " Dropout(0.2),\n", " LSTM(NUM_UNITS,activation='relu'),\n", " Dense(len(output_labels), activation='softmax')\n", "])\n", "model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[Precision(), Recall(),'accuracy'])\n", "print(model.summary())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7sByLvCUJQZP", "outputId": "16daa2e5-3955-4885-d8bc-84f05399bd52" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Model: \"sequential\"\n", "_________________________________________________________________\n", " Layer (type) Output Shape Param # \n", "=================================================================\n", " embedding (Embedding) (None, 122, 32) 44448 \n", " \n", " dropout (Dropout) (None, 122, 32) 0 \n", " \n", " lstm (LSTM) (None, 32) 8320 \n", " \n", " dense (Dense) (None, 3) 99 \n", " \n", "=================================================================\n", "Total params: 52,867\n", "Trainable params: 52,867\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "None\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Train Model \n", "The actual training step for the model" ], "metadata": { "id": "CxpgHz4AJZm1" } }, { "cell_type": "code", "source": [ "history=model.fit(training, output_encoded, epochs=EPOCHS, batch_size=BATCH_SIZE, verbose = 1, validation_split = VAL_SPLIT)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "m5nEI4mUJV0C", "outputId": "7fb00a14-e2c2-41ee-b0b1-0d9e7325937a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Epoch 1/20\n", "36/36 [==============================] - 6s 95ms/step - loss: 1.0946 - precision: 0.0000e+00 - recall: 0.0000e+00 - accuracy: 0.4111 - val_loss: 1.0916 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_accuracy: 0.3750\n", "Epoch 2/20\n", "36/36 [==============================] - 2s 45ms/step - loss: 1.0774 - precision: 0.0000e+00 - recall: 0.0000e+00 - accuracy: 0.5611 - val_loss: 1.0831 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_accuracy: 0.3750\n", "Epoch 3/20\n", "36/36 [==============================] - 2s 46ms/step - loss: 1.0535 - precision: 0.6667 - recall: 0.0278 - accuracy: 0.5861 - val_loss: 1.0641 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_accuracy: 0.4750\n", "Epoch 4/20\n", "36/36 [==============================] - 2s 48ms/step - loss: 0.9295 - precision: 0.9412 - recall: 0.0444 - accuracy: 0.7417 - val_loss: 1.0391 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_accuracy: 0.5000\n", "Epoch 5/20\n", "36/36 [==============================] - 2s 45ms/step - loss: 0.7216 - precision: 0.9691 - recall: 0.4361 - accuracy: 0.8944 - val_loss: 1.0863 - val_precision: 0.5789 - val_recall: 0.2750 - val_accuracy: 0.5250\n", "Epoch 6/20\n", "36/36 [==============================] - 2s 46ms/step - loss: 1.5734 - precision: 0.9534 - recall: 0.6250 - accuracy: 0.9111 - val_loss: 0.9943 - val_precision: 0.8750 - val_recall: 0.1750 - val_accuracy: 0.5750\n", "Epoch 7/20\n", "36/36 [==============================] - 2s 50ms/step - loss: 0.5846 - precision: 0.9959 - recall: 0.6806 - accuracy: 0.9444 - val_loss: 0.9795 - val_precision: 0.8750 - val_recall: 0.1750 - val_accuracy: 0.5500\n", "Epoch 8/20\n", "36/36 [==============================] - 3s 81ms/step - loss: 0.4886 - precision: 0.9927 - recall: 0.7583 - accuracy: 0.9611 - val_loss: 0.9644 - val_precision: 0.8182 - val_recall: 0.2250 - val_accuracy: 0.5250\n", "Epoch 9/20\n", "36/36 [==============================] - 2s 45ms/step - loss: 0.4025 - precision: 0.9933 - recall: 0.8222 - accuracy: 0.9611 - val_loss: 0.9566 - val_precision: 0.7692 - val_recall: 0.2500 - val_accuracy: 0.5250\n", "Epoch 10/20\n", "36/36 [==============================] - 2s 45ms/step - loss: 0.3362 - precision: 0.9904 - recall: 0.8639 - accuracy: 0.9694 - val_loss: 0.9570 - val_precision: 0.7500 - val_recall: 0.3000 - val_accuracy: 0.5250\n", "Epoch 11/20\n", "36/36 [==============================] - 2s 45ms/step - loss: 0.2744 - precision: 0.9939 - recall: 0.9028 - accuracy: 0.9806 - val_loss: 0.9650 - val_precision: 0.6842 - val_recall: 0.3250 - val_accuracy: 0.5250\n", "Epoch 12/20\n", "36/36 [==============================] - 2s 45ms/step - loss: 0.2303 - precision: 0.9940 - recall: 0.9250 - accuracy: 0.9778 - val_loss: 0.9903 - val_precision: 0.6190 - val_recall: 0.3250 - val_accuracy: 0.5250\n", "Epoch 13/20\n", "36/36 [==============================] - 2s 44ms/step - loss: 0.1908 - precision: 0.9942 - recall: 0.9444 - accuracy: 0.9833 - val_loss: 1.0452 - val_precision: 0.5652 - val_recall: 0.3250 - val_accuracy: 0.5250\n", "Epoch 14/20\n", "36/36 [==============================] - 2s 47ms/step - loss: 0.1541 - precision: 0.9943 - recall: 0.9611 - accuracy: 0.9833 - val_loss: 1.3452 - val_precision: 0.5200 - val_recall: 0.3250 - val_accuracy: 0.5500\n", "Epoch 15/20\n", "36/36 [==============================] - 3s 73ms/step - loss: 0.1271 - precision: 0.9971 - recall: 0.9639 - accuracy: 0.9861 - val_loss: 1.5000 - val_precision: 0.5556 - val_recall: 0.3750 - val_accuracy: 0.5500\n", "Epoch 16/20\n", "36/36 [==============================] - 2s 55ms/step - loss: 0.1493 - precision: 0.9943 - recall: 0.9667 - accuracy: 0.9889 - val_loss: 1.2082 - val_precision: 0.5769 - val_recall: 0.3750 - val_accuracy: 0.5500\n", "Epoch 17/20\n", "36/36 [==============================] - 2s 47ms/step - loss: 0.1030 - precision: 0.9971 - recall: 0.9667 - accuracy: 0.9833 - val_loss: 1.3049 - val_precision: 0.5517 - val_recall: 0.4000 - val_accuracy: 0.5500\n", "Epoch 18/20\n", "36/36 [==============================] - 2s 46ms/step - loss: 0.0900 - precision: 0.9972 - recall: 0.9750 - accuracy: 0.9889 - val_loss: 1.4090 - val_precision: 0.5667 - val_recall: 0.4250 - val_accuracy: 0.5500\n", "Epoch 19/20\n", "36/36 [==============================] - 2s 46ms/step - loss: 0.0767 - precision: 0.9972 - recall: 0.9750 - accuracy: 0.9889 - val_loss: 1.5139 - val_precision: 0.5667 - val_recall: 0.4250 - val_accuracy: 0.5500\n", "Epoch 20/20\n", "36/36 [==============================] - 2s 45ms/step - loss: 0.0680 - precision: 1.0000 - recall: 0.9750 - accuracy: 0.9917 - val_loss: 1.6274 - val_precision: 0.5312 - val_recall: 0.4250 - val_accuracy: 0.5500\n" ] } ] }, { "cell_type": "code", "source": [], "metadata": { "id": "iWHPumwgJe5P" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Evaluate model \n", "\n", "Evaluting the performance of the model. A very bad case of overfitting happening in this trained model because of the limited data I used. Can be improved by increasing training data and tuning other parameters." ], "metadata": { "id": "J69DUun-JtTH" } }, { "cell_type": "code", "source": [ "def plot_graphs(history, metric):\n", " plt.plot(history.history[metric])\n", " plt.plot(history.history['val_'+metric], '')\n", " plt.xlabel(\"Epochs\")\n", " plt.ylabel(metric)\n", " plt.legend([metric, 'val_'+metric])\n", "plt.figure(figsize=(16, 8))\n", "plt.subplot(1, 2, 1)\n", "plot_graphs(history, 'accuracy')\n", "plt.ylim(None, 1)\n", "plt.subplot(1, 2, 2)\n", "plot_graphs(history, 'loss')\n", "plt.ylim(0, None)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 718 }, "id": "I0bm7lZLJwZH", "outputId": "f92ef16a-dac1-454b-ceb6-d9077dea4d59" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(0.0, 1.7053281906992197)" ] }, "metadata": {}, "execution_count": 24 }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "# Peform Inference \n", "Here the model is being tested with some text input" ], "metadata": { "id": "AWOv0GINKJGj" } }, { "cell_type": "code", "source": [ "sentence = \"i am so sad\"\n", "input_seq = tokenizer.texts_to_sequences([sentence])\n", "input_features = pad_sequences(input_seq, maxlen = max_sequence_len, padding = 'post')\n", "\n", "#Predict the label\n", "probs = model.predict(input_features)\n", "predicted_y = probs.argmax(axis=-1)\n", "predicted_y\n", "print(encoder.classes_[predicted_y][0])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lF-CiYmbJ1Eb", "outputId": "c70f4f33-a9b7-4338-83e8-5f3357cbd787" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "1/1 [==============================] - 1s 547ms/step\n", "negative\n" ] } ] }, { "cell_type": "markdown", "source": [ "# Save the Model files \n", "Using MLEM package to save the model files for deployment" ], "metadata": { "id": "_GfgPLt6KQRn" } }, { "cell_type": "markdown", "source": [ "##### Save The Tokenizer" ], "metadata": { "id": "wkrYD2sgKekO" } }, { "cell_type": "code", "source": [ "import pickle\n", "with open('tokenizer.pickle', 'wb') as handle:\n", " pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)" ], "metadata": { "id": "2JmMYqyDKMJw" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "##### Save the Model" ], "metadata": { "id": "UA7vTjVXKib9" } }, { "cell_type": "code", "source": [ "# Instal mlem for saving the model\n", "# !pip install mlem" ], "metadata": { "id": "tVlsCgDMKkom" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from mlem.api import save,load\n", "save(model, \"models/tf\")\n", "save(encoder,\"encoder/tf\")" ], "metadata": { "id": "_nKiEXAdKrZe" }, "execution_count": null, "outputs": [] } ] }