{ "cells": [ { "cell_type": "markdown", "id": "56cccab6", "metadata": {}, "source": [ "# Emotions Detection in Text" ] }, { "cell_type": "code", "execution_count": 1, "id": "f0814628-3d83-4fd6-a511-2eccf79f9f1e", "metadata": {}, "outputs": [], "source": [ "# EDA\n", "import pandas as pd\n", "import numpy as np\n", "\n", "# Load Data Viz Pkgs\n", "import seaborn as sns\n", "\n", "# Load Text Cleaning Pkgs\n", "import neattext.functions as nfx\n", "\n", "# Load ML Pkgs\n", "# Estimators\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.naive_bayes import MultinomialNB\n", "\n", "# Transformers\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score,classification_report,confusion_matrix" ] }, { "cell_type": "code", "execution_count": 2, "id": "b209e004-ab77-4407-8689-b4318944d47f", "metadata": {}, "outputs": [], "source": [ "# Load Dataset\n", "df = pd.read_csv(\"../data/emotion_dataset_raw.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "fea2d4c0-3bdd-405e-ab69-507ceaac36cb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EmotionText
0neutralWhy ?
1joySage Act upgrade on my to do list for tommorow.
2sadnessON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...
3joySuch an eye ! The true hazel eye-and so brill...
4joy@Iluvmiasantos ugh babe.. hugggzzz for u .! b...
\n", "
" ], "text/plain": [ " Emotion Text\n", "0 neutral Why ? \n", "1 joy Sage Act upgrade on my to do list for tommorow.\n", "2 sadness ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...\n", "3 joy Such an eye ! The true hazel eye-and so brill...\n", "4 joy @Iluvmiasantos ugh babe.. hugggzzz for u .! b..." ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "430565a3-cf3b-4c6f-afa5-bafd084f5676", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "joy 11045\n", "sadness 6722\n", "fear 5410\n", "anger 4297\n", "surprise 4062\n", "neutral 2254\n", "disgust 856\n", "shame 146\n", "Name: Emotion, dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Value Counts\n", "df['Emotion'].value_counts()" ] }, { "cell_type": "code", "execution_count": 5, "id": "531d3449-a959-4a19-bff0-3ffed551e619", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAGwCAYAAAC0HlECAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA9IUlEQVR4nO3de1yUdf7//+cIMg6nSVBOK3lIRFRS01K01PLUAc12V9tsSTdTy5JIzXItIzd1szyttqZuq62H7LuVbQdlPZSu5jGUPBGaYeoKYS2CmgHC+/dHH6+fIx6uEATscb/d5lbzvl7XNa/3cA3z9JprLhzGGCMAAABcUo3KbgAAAKA6IDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAG7wru4FrSUlJiY4ePaqAgAA5HI7KbgcAANhgjNGJEycUERGhGjUufjyJ0FSOjh49qsjIyMpuAwAAlMHhw4dVr169iy4nNJWjgIAAST896YGBgZXcDQAAsCM/P1+RkZHW+/jFEJrK0dmP5AIDAwlNAABUM5c7tYYTwQEAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAG7wruwFc2w6Nj63sFq7Y9eN2VXYLAIAqgCNNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsKFSQ9N//vMf9erVSxEREXI4HHr//fc9lhtjlJycrIiICLlcLnXp0kV79uzxqCkoKNDw4cNVp04d+fn5qXfv3jpy5IhHTW5urhISEuR2u+V2u5WQkKDjx4971Bw6dEi9evWSn5+f6tSpo8TERBUWFlbEtAEAQDVUqaHp1KlTatmypWbNmnXB5ZMnT9bUqVM1a9Ysbdu2TWFhYerevbtOnDhh1SQlJWnZsmVaunSpNmzYoJMnTyo+Pl7FxcVWTf/+/ZWWlqaUlBSlpKQoLS1NCQkJ1vLi4mLdc889OnXqlDZs2KClS5fq3Xff1ciRIytu8gAAoFpxGGNMZTchSQ6HQ8uWLVOfPn0k/XSUKSIiQklJSXrmmWck/XRUKTQ0VC+//LKGDh2qvLw81a1bVwsXLtT9998vSTp69KgiIyO1fPly9ezZU+np6WrWrJk2b96sdu3aSZI2b96suLg4ffnll4qOjtaKFSsUHx+vw4cPKyIiQpK0dOlSDRw4UDk5OQoMDLQ1h/z8fLndbuXl5dle51p3aHxsZbdwxa4ft6uyWwAAVCC7799V9pymzMxMZWdnq0ePHtaY0+lU586dtXHjRklSamqqioqKPGoiIiLUokULq2bTpk1yu91WYJKk9u3by+12e9S0aNHCCkyS1LNnTxUUFCg1NfWiPRYUFCg/P9/jBgAArk1VNjRlZ2dLkkJDQz3GQ0NDrWXZ2dny8fFR7dq1L1kTEhJSavshISEeNec/Tu3ateXj42PVXMikSZOs86TcbrciIyN/5iwBAEB1UWVD01kOh8PjvjGm1Nj5zq+5UH1Zas43ZswY5eXlWbfDhw9fsi8AAFB9VdnQFBYWJkmljvTk5ORYR4XCwsJUWFio3NzcS9Z8++23pbZ/7Ngxj5rzHyc3N1dFRUWljkCdy+l0KjAw0OMGAACuTVU2NDVs2FBhYWFatWqVNVZYWKh169apQ4cOkqQ2bdqoZs2aHjVZWVnavXu3VRMXF6e8vDxt3brVqtmyZYvy8vI8anbv3q2srCyrZuXKlXI6nWrTpk2FzhMAAFQP3pX54CdPntRXX31l3c/MzFRaWpqCgoJ0/fXXKykpSRMnTlRUVJSioqI0ceJE+fr6qn///pIkt9utQYMGaeTIkQoODlZQUJBGjRql2NhYdevWTZIUExOjO++8U4MHD9acOXMkSUOGDFF8fLyio6MlST169FCzZs2UkJCgV155Rf/73/80atQoDR48mKNHAABAUiWHps8//1y33367dX/EiBGSpAEDBmjBggUaPXq0Tp8+rWHDhik3N1ft2rXTypUrFRAQYK0zbdo0eXt7q1+/fjp9+rS6du2qBQsWyMvLy6pZvHixEhMTrW/Z9e7d2+PaUF5eXvr44481bNgwdezYUS6XS/3799err75a0U8BAACoJqrMdZquBVynqTSu0wQAqOqq/XWaAAAAqhJCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGyo0qHpzJkzeu6559SwYUO5XC41atRI48ePV0lJiVVjjFFycrIiIiLkcrnUpUsX7dmzx2M7BQUFGj58uOrUqSM/Pz/17t1bR44c8ajJzc1VQkKC3G633G63EhISdPz48asxTQAAUA1U6dD08ssv6/XXX9esWbOUnp6uyZMn65VXXtHMmTOtmsmTJ2vq1KmaNWuWtm3bprCwMHXv3l0nTpywapKSkrRs2TItXbpUGzZs0MmTJxUfH6/i4mKrpn///kpLS1NKSopSUlKUlpamhISEqzpfAABQdTmMMaaym7iY+Ph4hYaG6o033rDGfvOb38jX11cLFy6UMUYRERFKSkrSM888I+mno0qhoaF6+eWXNXToUOXl5alu3bpauHCh7r//fknS0aNHFRkZqeXLl6tnz55KT09Xs2bNtHnzZrVr106StHnzZsXFxenLL79UdHS0rX7z8/PldruVl5enwMDAcn42qqdD42Mru4Urdv24XZXdAgCgAtl9/67SR5puvfVWrVmzRvv27ZMkffHFF9qwYYPuvvtuSVJmZqays7PVo0cPax2n06nOnTtr48aNkqTU1FQVFRV51ERERKhFixZWzaZNm+R2u63AJEnt27eX2+22ai6koKBA+fn5HjcAAHBt8q7sBi7lmWeeUV5enpo2bSovLy8VFxdrwoQJeuCBByRJ2dnZkqTQ0FCP9UJDQ/XNN99YNT4+Pqpdu3apmrPrZ2dnKyQkpNTjh4SEWDUXMmnSJL344otlnyAAAKg2qvSRprfffluLFi3SkiVLtH37dr355pt69dVX9eabb3rUORwOj/vGmFJj5zu/5kL1l9vOmDFjlJeXZ90OHz5sZ1oAAKAaqtJHmp5++mk9++yz+t3vfidJio2N1TfffKNJkyZpwIABCgsLk/TTkaLw8HBrvZycHOvoU1hYmAoLC5Wbm+txtCknJ0cdOnSwar799ttSj3/s2LFSR7HO5XQ65XQ6r3yiAACgyqvSR5p++OEH1ajh2aKXl5d1yYGGDRsqLCxMq1atspYXFhZq3bp1ViBq06aNatas6VGTlZWl3bt3WzVxcXHKy8vT1q1brZotW7YoLy/PqgEAAL9sVfpIU69evTRhwgRdf/31at68uXbs2KGpU6fq4YcflvTTR2pJSUmaOHGioqKiFBUVpYkTJ8rX11f9+/eXJLndbg0aNEgjR45UcHCwgoKCNGrUKMXGxqpbt26SpJiYGN15550aPHiw5syZI0kaMmSI4uPjbX9zDgAAXNuqdGiaOXOmnn/+eQ0bNkw5OTmKiIjQ0KFDNW7cOKtm9OjROn36tIYNG6bc3Fy1a9dOK1euVEBAgFUzbdo0eXt7q1+/fjp9+rS6du2qBQsWyMvLy6pZvHixEhMTrW/Z9e7dW7Nmzbp6kwUAAFValb5OU3XDdZpK4zpNAICq7pq4ThMAAEBVQWgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2OBd2Q0A16KOMztWdgtX7LPhn1V2CwBQpXCkCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2lCk03XHHHTp+/Hip8fz8fN1xxx1X2hMAAECVU6bQtHbtWhUWFpYa//HHH7V+/forbgoAAKCq8f45xTt37rT+f+/evcrOzrbuFxcXKyUlRb/61a/KrzsAAIAq4meFplatWsnhcMjhcFzwYziXy6WZM2eWW3MAAABVxc8KTZmZmTLGqFGjRtq6davq1q1rLfPx8VFISIi8vLzKvUkAAIDK9rNCU/369SVJJSUlFdIMAABAVfWzQtO59u3bp7Vr1yonJ6dUiBo3btwVNwYAAFCVlOnbc/PmzVOzZs00btw4vfPOO1q2bJl1e//998u1wf/+97/6/e9/r+DgYPn6+qpVq1ZKTU21lhtjlJycrIiICLlcLnXp0kV79uzx2EZBQYGGDx+uOnXqyM/PT71799aRI0c8anJzc5WQkCC32y23262EhIQLXlYBAAD8MpUpNL300kuaMGGCsrOzlZaWph07dli37du3l1tzubm56tixo2rWrKkVK1Zo7969mjJliq677jqrZvLkyZo6dapmzZqlbdu2KSwsTN27d9eJEyesmqSkJC1btkxLly7Vhg0bdPLkScXHx6u4uNiq6d+/v9LS0pSSkqKUlBSlpaUpISGh3OYCAACqtzJ9PJebm6u+ffuWdy+lvPzyy4qMjNT8+fOtsQYNGlj/b4zR9OnTNXbsWP3617+WJL355psKDQ3VkiVLNHToUOXl5emNN97QwoUL1a1bN0nSokWLFBkZqdWrV6tnz55KT09XSkqKNm/erHbt2kn66WhaXFycMjIyFB0dXeFzBQAAVVuZjjT17dtXK1euLO9eSvnggw/Utm1b9e3bVyEhIWrdurXmzZtnLc/MzFR2drZ69OhhjTmdTnXu3FkbN26UJKWmpqqoqMijJiIiQi1atLBqNm3aJLfbbQUmSWrfvr3cbrdVcyEFBQXKz8/3uAEAgGtTmY40NW7cWM8//7w2b96s2NhY1axZ02N5YmJiuTT39ddfa/bs2RoxYoT++Mc/auvWrUpMTJTT6dRDDz1kXVwzNDTUY73Q0FB98803kqTs7Gz5+Piodu3apWrOrp+dna2QkJBSjx8SEuJxAc/zTZo0SS+++OIVzREAAFQPZQpNc+fOlb+/v9atW6d169Z5LHM4HOUWmkpKStS2bVtNnDhRktS6dWvt2bNHs2fP1kMPPeTxmOcyxpQaO9/5NReqv9x2xowZoxEjRlj38/PzFRkZeelJAQCAaqlMoSkzM7O8+7ig8PBwNWvWzGMsJiZG7777riQpLCxM0k9HisLDw62anJwc6+hTWFiYCgsLlZub63G0KScnRx06dLBqvv3221KPf+zYsVJHsc7ldDrldDrLODsAAFCdlOmcpqulY8eOysjI8Bjbt2+fdZHNhg0bKiwsTKtWrbKWFxYWat26dVYgatOmjWrWrOlRk5WVpd27d1s1cXFxysvL09atW62aLVu2KC8vz6oBAAC/bGU60vTwww9fcvnf//73MjVzvqeeekodOnTQxIkT1a9fP23dulVz587V3LlzJf30kVpSUpImTpyoqKgoRUVFaeLEifL19VX//v0lSW63W4MGDdLIkSMVHBysoKAgjRo1SrGxsda36WJiYnTnnXdq8ODBmjNnjiRpyJAhio+P55tzAABA0hVccuBcRUVF2r17t44fP37BP+RbVjfffLOWLVumMWPGaPz48WrYsKGmT5+uBx980KoZPXq0Tp8+rWHDhik3N1ft2rXTypUrFRAQYNVMmzZN3t7e6tevn06fPq2uXbtqwYIFHn8nb/HixUpMTLS+Zde7d2/NmjWr3OYCAACqN4cxxpTHhkpKSjRs2DA1atRIo0ePLo9NVjv5+flyu93Ky8tTYGBgZbdTJRwaH1vZLVyx68ft+tnrdJzZsQI6ubo+G/5ZZbcAAFeF3ffvcjunqUaNGnrqqac0bdq08tokAABAlVGuJ4IfOHBAZ86cKc9NAgAAVAllOqfp3GsTST9dzygrK0sff/yxBgwYUC6NAQAAVCVlCk07duzwuF+jRg3VrVtXU6ZMuew36wAAAKqjMoWmTz/9tLz7AAAAqNLKFJrOOnbsmDIyMuRwONSkSRPVrVu3vPoCAACoUsp0IvipU6f08MMPKzw8XJ06ddJtt92miIgIDRo0SD/88EN59wgAAFDpyhSaRowYoXXr1unDDz/U8ePHdfz4cf3rX//SunXrNHLkyPLuEQAAoNKV6eO5d999V++88466dOlijd19991yuVzq16+fZs+eXV79AQAAVAllOtL0ww8/KDQ0tNR4SEgIH88BAIBrUplCU1xcnF544QX9+OOP1tjp06f14osvKi4urtyaAwAAqCrK9PHc9OnTddddd6levXpq2bKlHA6H0tLS5HQ6tXLlyvLuEUA1sa5T58pu4Yp1/s+6ym4BQBVVptAUGxur/fv3a9GiRfryyy9ljNHvfvc7Pfjgg3K5XOXdIwAAQKUrU2iaNGmSQkNDNXjwYI/xv//97zp27JieeeaZcmkOAACgqijTOU1z5sxR06ZNS403b95cr7/++hU3BQAAUNWUKTRlZ2crPDy81HjdunWVlZV1xU0BAABUNWUKTZGRkfrss89KjX/22WeKiIi44qYAAACqmjKd0/TII48oKSlJRUVFuuOOOyRJa9as0ejRo7kiOAAAuCaVKTSNHj1a//vf/zRs2DAVFhZKkmrVqqVnnnlGY8aMKdcGAQAAqoIyhSaHw6GXX35Zzz//vNLT0+VyuRQVFSWn01ne/QEAAFQJZQpNZ/n7++vmm28ur14AAACqrDKdCA4AAPBLQ2gCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwwbuyGwAAVE8Tfv/bym7hio1d9E5lt4BqhCNNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYEO1Ck2TJk2Sw+FQUlKSNWaMUXJysiIiIuRyudSlSxft2bPHY72CggINHz5cderUkZ+fn3r37q0jR4541OTm5iohIUFut1tut1sJCQk6fvz4VZgVAACoDqrNJQe2bdumuXPn6sYbb/QYnzx5sqZOnaoFCxaoSZMmeumll9S9e3dlZGQoICBAkpSUlKQPP/xQS5cuVXBwsEaOHKn4+HilpqbKy8tLktS/f38dOXJEKSkpkqQhQ4YoISFBH3744dWdKIBqZ9bI6v974okpvSq7BaDKqxZHmk6ePKkHH3xQ8+bNU+3ata1xY4ymT5+usWPH6te//rVatGihN998Uz/88IOWLFkiScrLy9Mbb7yhKVOmqFu3bmrdurUWLVqkXbt2afXq1ZKk9PR0paSk6G9/+5vi4uIUFxenefPm6aOPPlJGRsZF+yooKFB+fr7HDQAAXJuqRWh6/PHHdc8996hbt24e45mZmcrOzlaPHj2sMafTqc6dO2vjxo2SpNTUVBUVFXnUREREqEWLFlbNpk2b5Ha71a5dO6umffv2crvdVs2FTJo0yfo4z+12KzIyslzmCwAAqp4qH5qWLl2q1NRUTZo0qdSy7OxsSVJoaKjHeGhoqLUsOztbPj4+HkeoLlQTEhJSavshISFWzYWMGTNGeXl51u3w4cM/b3IAAKDaqNLnNB0+fFhPPvmkVq5cqVq1al20zuFweNw3xpQaO9/5NReqv9x2nE6nnE7nJR8HAABcG6r0kabU1FTl5OSoTZs28vb2lre3t9atW6e//OUv8vb2to4wnX80KCcnx1oWFhamwsJC5ebmXrLm22+/LfX4x44dK3UUCwAA/DJV6dDUtWtX7dq1S2lpadatbdu2evDBB5WWlqZGjRopLCxMq1atstYpLCzUunXr1KFDB0lSmzZtVLNmTY+arKws7d6926qJi4tTXl6etm7datVs2bJFeXl5Vg0AAPhlq9IfzwUEBKhFixYeY35+fgoODrbGk5KSNHHiREVFRSkqKkoTJ06Ur6+v+vfvL0lyu90aNGiQRo4cqeDgYAUFBWnUqFGKjY21TiyPiYnRnXfeqcGDB2vOnDmSfrrkQHx8vKKjo6/ijAEAQFVVpUOTHaNHj9bp06c1bNgw5ebmql27dlq5cqV1jSZJmjZtmry9vdWvXz+dPn1aXbt21YIFC6xrNEnS4sWLlZiYaH3Lrnfv3po1a9ZVnw8AAKiaql1oWrt2rcd9h8Oh5ORkJScnX3SdWrVqaebMmZo5c+ZFa4KCgrRo0aJy6hIAAFxrqvQ5TQAAAFUFoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDBu7Ib+KVo8/Q/KruFK5b6ykOV3QIAAJWGI00AAAA2EJoAAABsIDQBAADYUKVD06RJk3TzzTcrICBAISEh6tOnjzIyMjxqjDFKTk5WRESEXC6XunTpoj179njUFBQUaPjw4apTp478/PzUu3dvHTlyxKMmNzdXCQkJcrvdcrvdSkhI0PHjxyt6igAAoJqo0qFp3bp1evzxx7V582atWrVKZ86cUY8ePXTq1CmrZvLkyZo6dapmzZqlbdu2KSwsTN27d9eJEyesmqSkJC1btkxLly7Vhg0bdPLkScXHx6u4uNiq6d+/v9LS0pSSkqKUlBSlpaUpISHhqs4XAABUXVX623MpKSke9+fPn6+QkBClpqaqU6dOMsZo+vTpGjt2rH79619Lkt58802FhoZqyZIlGjp0qPLy8vTGG29o4cKF6tatmyRp0aJFioyM1OrVq9WzZ0+lp6crJSVFmzdvVrt27SRJ8+bNU1xcnDIyMhQdHX11Jw4AAKqcKn2k6Xx5eXmSpKCgIElSZmamsrOz1aNHD6vG6XSqc+fO2rhxoyQpNTVVRUVFHjURERFq0aKFVbNp0ya53W4rMElS+/bt5Xa7rZoLKSgoUH5+vscNAABcm6pNaDLGaMSIEbr11lvVokULSVJ2drYkKTQ01KM2NDTUWpadnS0fHx/Vrl37kjUhISGlHjMkJMSquZBJkyZZ50C53W5FRkaWfYIAAKBKqzah6YknntDOnTv11ltvlVrmcDg87htjSo2d7/yaC9VfbjtjxoxRXl6edTt8+PDlpgEAAKqpahGahg8frg8++ECffvqp6tWrZ42HhYVJUqmjQTk5OdbRp7CwMBUWFio3N/eSNd9++22pxz127Fipo1jncjqdCgwM9LgBAIBrU5UOTcYYPfHEE3rvvff0ySefqGHDhh7LGzZsqLCwMK1atcoaKyws1Lp169ShQwdJUps2bVSzZk2PmqysLO3evduqiYuLU15enrZu3WrVbNmyRXl5eVYNAAD4ZavS3557/PHHtWTJEv3rX/9SQECAdUTJ7XbL5XLJ4XAoKSlJEydOVFRUlKKiojRx4kT5+vqqf//+Vu2gQYM0cuRIBQcHKygoSKNGjVJsbKz1bbqYmBjdeeedGjx4sObMmSNJGjJkiOLj4/nmHAAAkFTFQ9Ps2bMlSV26dPEYnz9/vgYOHChJGj16tE6fPq1hw4YpNzdX7dq108qVKxUQEGDVT5s2Td7e3urXr59Onz6trl27asGCBfLy8rJqFi9erMTEROtbdr1799asWbMqdoIAAKDaqNKhyRhz2RqHw6Hk5GQlJydftKZWrVqaOXOmZs6cedGaoKAgLVq0qCxtAgCAX4AqfU4TAABAVUFoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALCB0AQAAGADoQkAAMAGQhMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA3eld0AAACo+pKTkyu7hSt2pXPgSBMAAIANhCYAAAAbCE0AAAA2EJoAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAACADYQmAAAAGwhNAAAANhCaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDBu7IbAACgOkmf8Ellt3DFYsbeUdktVEscaTrPX//6VzVs2FC1atVSmzZttH79+spuCQAAVAGEpnO8/fbbSkpK0tixY7Vjxw7ddtttuuuuu3To0KHKbg0AAFQyQtM5pk6dqkGDBumRRx5RTEyMpk+frsjISM2ePbuyWwMAAJWMc5r+T2FhoVJTU/Xss896jPfo0UMbN2684DoFBQUqKCiw7ufl5UmS8vPzS9UWF5wux24rx4XmdTknfiyugE6urrLM+8zpMxXQydVVlnmfOvPLnPfpgh8qoJOrqyzz/rGoqAI6ubrKMu+TP56qgE6urrLM+9z3u+rqYvM+O26MufQGDIwxxvz3v/81ksxnn33mMT5hwgTTpEmTC67zwgsvGEncuHHjxo0bt2vgdvjw4UtmBY40ncfhcHjcN8aUGjtrzJgxGjFihHW/pKRE//vf/xQcHHzRdSpKfn6+IiMjdfjwYQUGBl7Vx65MzJt5/xIwb+b9S1CZ8zbG6MSJE4qIiLhkHaHp/9SpU0deXl7Kzs72GM/JyVFoaOgF13E6nXI6nR5j1113XUW1aEtgYOAv6kV2FvP+ZWHevyzM+5elsubtdrsvW8OJ4P/Hx8dHbdq00apVqzzGV61apQ4dOlRSVwAAoKrgSNM5RowYoYSEBLVt21ZxcXGaO3euDh06pEcffbSyWwMAAJWM0HSO+++/X99//73Gjx+vrKwstWjRQsuXL1f9+vUru7XLcjqdeuGFF0p9XHitY97M+5eAeTPvX4LqMG+HMZf7fh0AAAA4pwkAAMAGQhMAAIANhCYAAAAbCE24pAYNGmj69OmV9vgDBw5Unz59Ku3xK5vD4dD7779f2W1UGGOMhgwZoqCgIDkcDqWlpVV2S9VWcnKyWrVqVdlt4P/8kn53denSRUlJSZIq/z2jovHtuWtMly5d1KpVq2tmp50xY8bl/xYQqq2UlBQtWLBAa9euVaNGjVSnTp3KbqnaGjVqlIYPH17ZbeAXbtu2bfLz86vsNiRJBw8eVMOGDbVjx45y+wcFoekXyBij4uJieXtX/R+/nSu0ovo6cOCAwsPDK/QCsoWFhfLx8amw7ZeXsvZ59vXs7+8vf3//Cujs2lFUVKSaNWtWdhvXtLp161Z2CxWKj+euoi5duigxMVGjR49WUFCQwsLClJycbC3Py8vTkCFDFBISosDAQN1xxx364osvrOUXOtyblJSkLl26WMvXrVunGTNmyOFwyOFw6ODBg1q7dq0cDof+/e9/q23btnI6nVq/fr0OHDige++9V6GhofL399fNN9+s1atXX4Vnwr5z51xQUKDExESFhISoVq1auvXWW7Vt2zZJP71xNG7cWK+++qrH+rt371aNGjV04MCBq9LvO++8o9jYWLlcLgUHB6tbt246deqUtm3bpu7du6tOnTpyu93q3Lmztm/f7rHu/v371alTJ9WqVUvNmjUrdXX6gwcPyuFw6L333tPtt98uX19ftWzZUps2bfKo27hxozp16iSXy6XIyEglJibq1Kn//6+y//Wvf1VUVJRq1aql0NBQ/fa3v71s/xVh4MCBGj58uA4dOiSHw6EGDRrIGKPJkyerUaNGcrlcatmypd555x1rneLiYg0aNEgNGzaUy+VSdHS0ZsyYUWq7ffr00aRJkxQREaEmTZpUSP/SxZ+vcz+uOKtPnz4aOHCgdb9BgwZ66aWXNHDgQLndbg0ePNj6GS9dulQdOnRQrVq11Lx5c61du9Za72Kv5/M/nlu7dq1uueUW+fn56brrrlPHjh31zTffWMs//PBDtWnTRrVq1VKjRo304osv6syZM+XyvKSkpOjWW2/Vddddp+DgYMXHx1uvQbv78bx58xQZGSlfX1/dd999mjp1aqk/U3W5OTgcDr3++uu699575efnp5deeqlc5neuy71mXn31VYWHhys4OFiPP/64ioqKrGWLFi1S27ZtFRAQoLCwMPXv3185OTnW8nN/1q1bt5bL5dIdd9yhnJwcrVixQjExMQoMDNQDDzygH374wVrvcq+jK3Hq1Ck99NBD8vf3V3h4uKZMmeKx/PyP55KTk3X99dfL6XQqIiJCiYmJ1rKsrCzdc889crlcatiwoZYsWeKx/tl95dyP7Y8fPy6Hw2G9JnJzc/Xggw+qbt26crlcioqK0vz58yVJDRs2lCS1bt1aDofDeq+8Ipf8c74oV507dzaBgYEmOTnZ7Nu3z7z55pvG4XCYlStXmpKSEtOxY0fTq1cvs23bNrNv3z4zcuRIExwcbL7//ntjjDEDBgww9957r8c2n3zySdO5c2djjDHHjx83cXFxZvDgwSYrK8tkZWWZM2fOmE8//dRIMjfeeKNZuXKl+eqrr8x3331n0tLSzOuvv2527txp9u3bZ8aOHWtq1aplvvnmG2v79evXN9OmTbtKz1Bp5845MTHRREREmOXLl5s9e/aYAQMGmNq1a1vPz4QJE0yzZs081n/qqadMp06drkqvR48eNd7e3mbq1KkmMzPT7Ny507z22mvmxIkTZs2aNWbhwoVm7969Zu/evWbQoEEmNDTU5OfnG2OMKS4uNi1atDBdunQxO3bsMOvWrTOtW7c2ksyyZcuMMcZkZmYaSaZp06bmo48+MhkZGea3v/2tqV+/vikqKjLGGLNz507j7+9vpk2bZvbt22c+++wz07p1azNw4EBjjDHbtm0zXl5eZsmSJebgwYNm+/btZsaMGZftvyIcP37cjB8/3tSrV89kZWWZnJwc88c//tE0bdrUpKSkmAMHDpj58+cbp9Np1q5da4wxprCw0IwbN85s3brVfP3112bRokXG19fXvP3229Z2BwwYYPz9/U1CQoLZvXu32bVrV4X0f6nnq3PnzubJJ5/0qL/33nvNgAEDrPv169c3gYGB5pVXXjH79+83+/fvt37G9erVM++8847Zu3eveeSRR0xAQID57rvvjDHmoq/nF154wbRs2dIYY0xRUZFxu91m1KhR5quvvjJ79+41CxYssF7bKSkpJjAw0CxYsMAcOHDArFy50jRo0MAkJyeXy3PzzjvvmHfffdfs27fP7Nixw/Tq1cvExsaa4uJiW/vxhg0bTI0aNcwrr7xiMjIyzGuvvWaCgoKM2+22HsPOHCSZkJAQ88Ybb5gDBw6YgwcPlsv8zrrUPjBgwAATGBhoHn30UZOenm4+/PBD4+vra+bOnWut/8Ybb5jly5ebAwcOmE2bNpn27dubu+66y1p+9mfdvn17s2HDBrN9+3bTuHFj07lzZ9OjRw+zfft285///McEBwebP//5z9Z6l3sdXYnHHnvM1KtXz6xcudLs3LnTxMfHG39/f2t/P/c945///KcJDAw0y5cvN998843ZsmWLx/y7detmWrVqZTZv3mxSU1NN586djcvlstY/u6/s2LHDWic3N9dIMp9++qkxxpjHH3/ctGrVymzbts1kZmaaVatWmQ8++MAYY8zWrVuNJLN69WqTlZVlvVdcCULTVdS5c2dz6623eozdfPPN5plnnjFr1qwxgYGB5scff/RYfsMNN5g5c+YYYy4fms4+xvm/rM++8N5///3L9tisWTMzc+ZM635VCU0nT540NWvWNIsXL7aWFRYWmoiICDN58mRjzE+/wLy8vMyWLVus5XXr1jULFiy4Kr2mpqYaSbZ+MZ85c8YEBASYDz/80BhjzL///W/j5eVlDh8+bNWsWLHigqHpb3/7m1WzZ88eI8mkp6cbY4xJSEgwQ4YM8Xis9evXmxo1apjTp0+bd9991wQGBlphraz9l5dp06aZ+vXrG2OMOXnypKlVq5bZuHGjR82gQYPMAw88cNFtDBs2zPzmN7+x7g8YMMCEhoaagoKCCun5rEs9X3ZDU58+fTxqzv6Mz30DLCoqMvXq1TMvv/yyMebir+dzQ9P3339vJF30TfK2224zEydO9BhbuHChCQ8Pv+ScyyonJ8dIMrt27bK1H99///3mnnvu8djGgw8+6BGa7MxBkklKSqqAGf3kUvvAgAEDTP369c2ZM2essb59+5r777//ots7+yZ/9h8qZ3/Wq1evtmomTZpkJJkDBw5YY0OHDjU9e/Y0xpT9dWTHiRMnjI+Pj1m6dKk19v333xuXy3XB0DRlyhTTpEkTU1hYWGpb6enpRpLZtm2bNbZ//34j6WeFpl69epk//OEPF+z3QutfKT6eu8puvPFGj/vh4eHKyclRamqqTp48qeDgYOvcBH9/f2VmZpbbR0tt27b1uH/q1CmNHj1azZo103XXXSd/f399+eWXOnToULk8Xnk6cOCAioqK1LFjR2usZs2auuWWW5Seni7pp+fynnvu0d///ndJ0kcffaQff/xRffv2vSo9tmzZUl27dlVsbKz69u2refPmKTc3V5KUk5OjRx99VE2aNJHb7Zbb7dbJkyet5zo9PV3XX3+96tWrZ20vLi7ugo9z7j4UHh5ubV+SUlNTtWDBAo99qGfPniopKVFmZqa6d++u+vXrq1GjRkpISNDixYutw/qX6v9q2Lt3r3788Ud1797do/9//OMfHq+B119/XW3btlXdunXl7++vefPmldpnY2NjK/w8pvJ4vs5/TZ517s/e29tbbdu2tfbzy60rSUFBQRo4cKB69uypXr16acaMGcrKyrKWp6amavz48R7P8+DBg5WVleXxMU9ZHThwQP3791ejRo0UGBhofUxy7s/pUvtxRkaGbrnlFo9tnn/f7hwu9TxdqcvtA82bN5eXl5d1/+zv+7N27Nihe++9V/Xr11dAQID18dH5+/O5z1VoaKh8fX3VqFEjj7Gz27X7OiqLAwcOqLCw0GP/DAoKUnR09AXr+/btq9OnT6tRo0YaPHiwli1bZn18mpGRIW9vb910001WfePGjVW7du2f1dNjjz2mpUuXqlWrVho9erQ2btxYhpnZR2i6ys4/CdHhcKikpEQlJSUKDw9XWlqaxy0jI0NPP/20JKlGjRqlvkl27ufjl3P+Nxqefvppvfvuu5owYYLWr1+vtLQ0xcbGqrCwsIyzqzhn5+1wOEqNnzv2yCOPaOnSpTp9+rTmz5+v+++/X76+vlelRy8vL61atUorVqxQs2bNNHPmTEVHRyszM1MDBw5Uamqqpk+fro0bNyotLU3BwcHWc33+z1UqPdezzt2HztaUlJRY/x06dKjHPvTFF19o//79uuGGGxQQEKDt27frrbfeUnh4uMaNG6eWLVvq+PHjl+z/ajg7h48//tij/71791rnY/y///f/9NRTT+nhhx/WypUrlZaWpj/84Q+l9tmr8e2dSz1fdl+rP6fP8/eHy607f/58bdq0SR06dNDbb7+tJk2aaPPmzZJ+eq5ffPFFj+d5165d2r9/v2rVqmW7p4vp1auXvv/+e82bN09btmzRli1bJMnj53Sp/fj81/XZsXPZnUNF7guXe81c7Pe99NM/Wnv06CF/f38tWrRI27Zt07JlyySp1P58/nN1qe3aeR2V1YV+T11KZGSkMjIy9Nprr8nlcmnYsGHq1KmTioqKLrqtc8dr1KhRauz819Fdd92lb775RklJSTp69Ki6du2qUaNG/aw+fw5CUxVx0003KTs7W97e3mrcuLHH7ezXsOvWrevxr0VJpa5r4+Pjo+LiYluPuX79eg0cOFD33XefYmNjFRYWpoMHD5bHdMpd48aN5ePjow0bNlhjRUVF+vzzzxUTE2ON3X333fLz89Ps2bO1YsUKPfzww1e1T4fDoY4dO+rFF1/Ujh075OPjo2XLlmn9+vVKTEzU3XffrebNm8vpdOq7776z1mvWrJkOHTqko0ePWmPnnxhrx0033aQ9e/aU2ofOPn/ST0cuunXrpsmTJ2vnzp06ePCgPvnkk0v2fzU0a9ZMTqdThw4dKtV7ZGSkpJ/22Q4dOmjYsGFq3bq1GjdufNVO8r+Qiz1f579Wi4uLtXv3btvbPRtuJOnMmTNKTU1V06ZNf3Z/rVu31pgxY7Rx40a1aNFCS5YskfTTfpKRkXHB/eTsG1VZff/990pPT9dzzz2nrl27KiYm5mcfgWvatKm2bt3qMfb555973K/IOfwcZX3NfPnll/ruu+/05z//WbfddpuaNm3qcRSqrOy8jsqqcePGqlmzpsf+mZubq3379l10HZfLpd69e+svf/mL1q5dq02bNmnXrl1q2rSpzpw5ox07dli1X331lY4fP27dP/tNvHNfSxe6llvdunU1cOBALVq0SNOnT9fcuXMlyfqdZ/c90Y6q/53zX4hu3bopLi5Offr00csvv6zo6GgdPXpUy5cvV58+fdS2bVvdcccdeuWVV/SPf/xDcXFxWrRokXbv3q3WrVtb22nQoIG2bNmigwcPyt/fX0FBQRd9zMaNG+u9995Tr1695HA49Pzzz1v/Sqlq/Pz89Nhjj+npp59WUFCQrr/+ek2ePFk//PCDBg0aZNV5eXlp4MCBGjNmjBo3bnzRj7gqwpYtW7RmzRr16NFDISEh2rJli44dO6aYmBg1btxYCxcuVNu2bZWfn6+nn35aLpfLWrdbt26Kjo7WQw89pClTpig/P19jx4792T0888wzat++vR5//HENHjxYfn5+Sk9P16pVqzRz5kx99NFH+vrrr9WpUyfVrl1by5cvV0lJiaKjoy/Z/9UQEBCgUaNG6amnnlJJSYluvfVW5efna+PGjfL399eAAQPUuHFj/eMf/9C///1vNWzYUAsXLtS2bdusj3+upks9X35+fhoxYoQ+/vhj3XDDDZo2bZrHm8HlvPbaa4qKilJMTIymTZum3Nzcn/UPgMzMTM2dO1e9e/dWRESEMjIytG/fPj300EOSpHHjxik+Pl6RkZHq27evatSooZ07d2rXrl1X/A2z2rVrKzg4WHPnzlV4eLgOHTqkZ5999mdtY/jw4erUqZOmTp2qXr166ZNPPtGKFSs8jj5V5BzsutQ+sHPnzkuue/3118vHx0czZ87Uo48+qt27d+tPf/rTFfdk53VUVv7+/ho0aJCefvppBQcHKzQ0VGPHjr1oSF2wYIGKi4vVrl07+fr6auHChXK5XKpfv771TcMhQ4Zo9uzZqlmzpkaOHCmXy2X9nF0ul9q3b68///nPatCggb777js999xzHo8xbtw4tWnTRs2bN1dBQYE++ugj63dWSEiIXC6XUlJSVK9ePdWqVevKL2NTbmdH4bIud3Jofn6+GT58uImIiDA1a9Y0kZGR5sEHHzSHDh2y6seNG2dCQ0ON2+02Tz31lHniiSc8TgTPyMgw7du3Ny6Xy0gymZmZ1smEubm5Ho+dmZlpbr/9duNyuUxkZKSZNWtWqR6ryongxhhz+vRpM3z4cFOnTh3jdDpNx44dzdatW0utc+DAASPJOkH8atm7d6/p2bOnqVu3rnE6naZJkybWSfXbt283bdu2NU6n00RFRZl//vOfpZ7bjIwMc+uttxofHx/TpEkTk5KScsETwS91UqQxP51M2r17d+Pv72/8/PzMjTfeaCZMmGCM+emk8M6dO5vatWsbl8tlbrzxRuubZ5fqv6KceyK4McaUlJSYGTNmmOjoaFOzZk1Tt25d07NnT7Nu3TpjjDE//vijGThwoHG73ea6664zjz32mHn22WetE6CNufAXJirCpZ6vwsJC89hjj5mgoCATEhJiJk2adMETwc9/bZ39GS9ZssS0a9fO+Pj4mJiYGLNmzRqr5mKv53NPBM/OzjZ9+vQx4eHhxsfHx9SvX9+MGzfOFBcXW/UpKSmmQ4cOxuVymcDAQHPLLbd4fLPpSqxatcrExMQYp9NpbrzxRrN27VprX7a7H8+dO9f86le/Mi6Xy/Tp08e89NJLJiwszONxLjeHc18/FeFS+4CdL+4sWbLENGjQwDidThMXF2c++OADj+fmQj/r+fPne5wQb4znz96Yy7+OrsSJEyfM73//e+Pr62tCQ0PN5MmTPd43zt2vly1bZtq1a2cCAwONn5+fad++vcdJ7UePHjV33XWXcTqdpn79+mbJkiUmJCTEvP7661bN3r17rfe0Vq1amZUrV3rsK3/6059MTEyMcblcJigoyNx7773m66+/ttafN2+eiYyMNDVq1PB47svKYQyXW0bV9cADD8jLy0uLFi2yvc5nn32mLl266MiRIwoNDa3A7oDyVRFXML5WDB48WF9++aXWr19f2a2gghw5ckSRkZFavXq1unbtWtntXBAfz6FKOnPmjPbt26dNmzZp6NChttYpKCjQ4cOH9fzzz6tfv34EJqAae/XVV9W9e3f5+flpxYoVevPNN/XXv/61sttCOfrkk0908uRJxcbGKisrS6NHj1aDBg3UqVOnym7tojgRHFXS7t271bZtWzVv3lyPPvqorXXeeustRUdHKy8vT5MnT67gDgFUpK1bt6p79+6KjY3V66+/rr/85S965JFHKrstlKOioiL98Y9/VPPmzXXfffepbt26Wrt2bZX+Uzd8PAcAAGADR5oAAABsIDQBAADYQGgCAACwgdAEAABgA6EJAADABkITAJSD5ORkLkgJXOMITQCqrYEDB8rhcJS63XnnnRX6uA6HQ++//77H2KhRo7RmzZoKfVwAlYsrggOo1u68807Nnz/fY8zpdF71Pvz9/eXv73/VHxfA1cORJgDVmtPpVFhYmMetdu3akn46IjRnzhzFx8fL19dXMTEx2rRpk7766it16dJFfn5+iouL04EDBzy2OXv2bN1www3y8fFRdHS0Fi5caC1r0KCBJOm+++6Tw+Gw7p//8VxJSYnGjx+vevXqyel0qlWrVkpJSbGWHzx4UA6HQ++9955uv/12+fr6qmXLltq0aVPFPFEArhihCcA17U9/+pMeeughpaWlqWnTpurfv7+GDh2qMWPG6PPPP5ckPfHEE1b9smXL9OSTT2rkyJHavXu3hg4dqj/84Q/69NNPJUnbtm2TJM2fP19ZWVnW/fPNmDFDU6ZM0auvvqqdO3eqZ8+e6t27t/bv3+9RN3bsWI0aNUppaWlq0qSJHnjgAZ05c6YingoAV8oAQDU1YMAA4+XlZfz8/Dxu48ePN8YYI8k899xzVv2mTZuMJPPGG29YY2+99ZapVauWdb9Dhw5m8ODBHo/Tt29fc/fdd1v3JZlly5Z51LzwwgumZcuW1v2IiAgzYcIEj5qbb77ZDBs2zBhjTGZmppFk/va3v1nL9+zZYySZ9PT0n/lMALgaOKcJQLV2++23a/bs2R5jQUFB1v/feOON1v+HhoZKkmJjYz3GfvzxR+Xn5yswMFDp6ekaMmSIx/Y6duyoGTNm2O4pPz9fR48eVceOHUtt54svvvAYO7e/8PBwSVJOTo6aNm1q+/EAXB2EJgDVmp+fnxo3bnzR5ef+xXSHw3HRsZKSklJjZxljSo3ZYWc7l+sFQNXBOU0AcI6YmBht2LDBY2zjxo2KiYmx7tesWVPFxcUX3UZgYKAiIiIuux0A1QtHmgBUawUFBcrOzvYY8/b2Vp06dcq0vaefflr9+vXTTTfdpK5du+rDDz/Ue++9p9WrV1s1DRo00Jo1a9SxY0c5nU7r23rnb+eFF17QDTfcoFatWmn+/PlKS0vT4sWLy9QXgMpHaAJQraWkpFjnAp0VHR2tL7/8skzb69Onj2bMmKFXXnlFiYmJatiwoebPn68uXbpYNVOmTNGIESM0b948/epXv9LBgwdLbScxMVH5+fkaOXKkcnJy1KxZM33wwQeKiooqU18AKp/DGGMquwkAAICqjnOaAAAAbCA0AQAA2EBoAgAAsIHQBAAAYAOhCQAAwAZCEwAAgA2EJgAAABsITQAAADYQmgAAAGwgNAEAANhAaAIAALDh/wNHGcb1uChEfwAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plot\n", "sns.countplot(x='Emotion',data=df)" ] }, { "cell_type": "code", "execution_count": 6, "id": "40f991d0-952f-40c1-bf00-f3476ce0436d", "metadata": { "jupyter": { "outputs_hidden": true }, "scrolled": false, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "['BTC_ADDRESS_REGEX',\n", " 'CURRENCY_REGEX',\n", " 'CURRENCY_SYMB_REGEX',\n", " 'Counter',\n", " 'DATE_REGEX',\n", " 'EMAIL_REGEX',\n", " 'EMOJI_REGEX',\n", " 'HASTAG_REGEX',\n", " 'MASTERCard_REGEX',\n", " 'MD5_SHA_REGEX',\n", " 'MOST_COMMON_PUNCT_REGEX',\n", " 'NUMBERS_REGEX',\n", " 'PHONE_REGEX',\n", " 'PoBOX_REGEX',\n", " 'SPECIAL_CHARACTERS_REGEX',\n", " 'STOPWORDS',\n", " 'STOPWORDS_de',\n", " 'STOPWORDS_en',\n", " 'STOPWORDS_es',\n", " 'STOPWORDS_fr',\n", " 'STOPWORDS_ru',\n", " 'STOPWORDS_yo',\n", " 'STREET_ADDRESS_REGEX',\n", " 'TextFrame',\n", " 'URL_PATTERN',\n", " 'USER_HANDLES_REGEX',\n", " 'VISACard_REGEX',\n", " '__builtins__',\n", " '__cached__',\n", " '__doc__',\n", " '__file__',\n", " '__generate_text',\n", " '__loader__',\n", " '__name__',\n", " '__numbers_dict',\n", " '__package__',\n", " '__spec__',\n", " '_lex_richness_herdan',\n", " '_lex_richness_maas_ttr',\n", " 'clean_text',\n", " 'defaultdict',\n", " 'digit2words',\n", " 'extract_btc_address',\n", " 'extract_currencies',\n", " 'extract_currency_symbols',\n", " 'extract_dates',\n", " 'extract_emails',\n", " 'extract_emojis',\n", " 'extract_hashtags',\n", " 'extract_html_tags',\n", " 'extract_mastercard_addr',\n", " 'extract_md5sha',\n", " 'extract_numbers',\n", " 'extract_pattern',\n", " 'extract_phone_numbers',\n", " 'extract_postoffice_box',\n", " 'extract_shortwords',\n", " 'extract_special_characters',\n", " 'extract_stopwords',\n", " 'extract_street_address',\n", " 'extract_terms_in_bracket',\n", " 'extract_urls',\n", " 'extract_userhandles',\n", " 'extract_visacard_addr',\n", " 'fix_contractions',\n", " 'generate_sentence',\n", " 'hamming_distance',\n", " 'inverse_df',\n", " 'lexical_richness',\n", " 'markov_chain',\n", " 'math',\n", " 'nlargest',\n", " 'normalize',\n", " 'num2words',\n", " 'random',\n", " 're',\n", " 'read_txt',\n", " 'remove_accents',\n", " 'remove_bad_quotes',\n", " 'remove_btc_address',\n", " 'remove_currencies',\n", " 'remove_currency_symbols',\n", " 'remove_custom_pattern',\n", " 'remove_custom_words',\n", " 'remove_dates',\n", " 'remove_emails',\n", " 'remove_emojis',\n", " 'remove_hashtags',\n", " 'remove_html_tags',\n", " 'remove_mastercard_addr',\n", " 'remove_md5sha',\n", " 'remove_multiple_spaces',\n", " 'remove_non_ascii',\n", " 'remove_numbers',\n", " 'remove_phone_numbers',\n", " 'remove_postoffice_box',\n", " 'remove_puncts',\n", " 'remove_punctuations',\n", " 'remove_shortwords',\n", " 'remove_special_characters',\n", " 'remove_stopwords',\n", " 'remove_street_address',\n", " 'remove_terms_in_bracket',\n", " 'remove_urls',\n", " 'remove_userhandles',\n", " 'remove_visacard_addr',\n", " 'replace_bad_quotes',\n", " 'replace_currencies',\n", " 'replace_currency_symbols',\n", " 'replace_dates',\n", " 'replace_emails',\n", " 'replace_emojis',\n", " 'replace_numbers',\n", " 'replace_phone_numbers',\n", " 'replace_special_characters',\n", " 'replace_term',\n", " 'replace_urls',\n", " 'string',\n", " 'term_freq',\n", " 'to_txt',\n", " 'unicodedata',\n", " 'word_freq',\n", " 'word_length_freq']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Data Cleaning\n", "dir(nfx)" ] }, { "cell_type": "code", "execution_count": 7, "id": "b1f87847-a91c-4bd6-a307-d746eb5aa9a0", "metadata": {}, "outputs": [], "source": [ "# User handles\n", "df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)" ] }, { "cell_type": "code", "execution_count": 8, "id": "03886bc3-1ac4-4f1b-842b-e5d2d770ff81", "metadata": {}, "outputs": [], "source": [ "# Stopwords\n", "df['Clean_Text'] = df['Clean_Text'].apply(nfx.remove_stopwords)" ] }, { "cell_type": "markdown", "id": "0ffcf4c7", "metadata": {}, "source": [ "## We are not removing Special Characters as some of the rows have just Special Characters and it'll result into empty row." ] }, { "cell_type": "code", "execution_count": 9, "id": "0a0fcc0c-4adf-4f0b-b226-164659ad70ba", "metadata": { "jupyter": { "outputs_hidden": true }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EmotionTextClean_Text
0neutralWhy ??
1joySage Act upgrade on my to do list for tommorow.Sage Act upgrade list tommorow.
2sadnessON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ...WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS...
3joySuch an eye ! The true hazel eye-and so brill...eye ! true hazel eye-and brilliant ! Regular f...
4joy@Iluvmiasantos ugh babe.. hugggzzz for u .! b...ugh babe.. hugggzzz u .! babe naamazed nga ako...
............
34787surprise@MichelGW have you gift! Hope you like it! It'...gift! Hope like it! hand wear ! It'll warm! Lol
34788joyThe world didnt give it to me..so the world MO...world didnt me..so world DEFINITELY cnt away!!!
34789angerA man robbed me today .man robbed today .
34790fearYouu call it JEALOUSY, I call it of #Losing YO...Youu JEALOUSY, #Losing YOU...
34791sadnessI think about you baby, and I dream about you ...think baby, dream time
\n", "

34792 rows × 3 columns

\n", "
" ], "text/plain": [ " Emotion Text \\\n", "0 neutral Why ? \n", "1 joy Sage Act upgrade on my to do list for tommorow. \n", "2 sadness ON THE WAY TO MY HOMEGIRL BABY FUNERAL!!! MAN ... \n", "3 joy Such an eye ! The true hazel eye-and so brill... \n", "4 joy @Iluvmiasantos ugh babe.. hugggzzz for u .! b... \n", "... ... ... \n", "34787 surprise @MichelGW have you gift! Hope you like it! It'... \n", "34788 joy The world didnt give it to me..so the world MO... \n", "34789 anger A man robbed me today . \n", "34790 fear Youu call it JEALOUSY, I call it of #Losing YO... \n", "34791 sadness I think about you baby, and I dream about you ... \n", "\n", " Clean_Text \n", "0 ? \n", "1 Sage Act upgrade list tommorow. \n", "2 WAY HOMEGIRL BABY FUNERAL!!! MAN HATE FUNERALS... \n", "3 eye ! true hazel eye-and brilliant ! Regular f... \n", "4 ugh babe.. hugggzzz u .! babe naamazed nga ako... \n", "... ... \n", "34787 gift! Hope like it! hand wear ! It'll warm! Lol \n", "34788 world didnt me..so world DEFINITELY cnt away!!! \n", "34789 man robbed today . \n", "34790 Youu JEALOUSY, #Losing YOU... \n", "34791 think baby, dream time \n", "\n", "[34792 rows x 3 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 10, "id": "450c39c0-79dd-4eaf-85fe-57e344eb81bd", "metadata": {}, "outputs": [], "source": [ "# Features & Labels\n", "Xfeatures = df['Clean_Text']\n", "ylabels = df['Emotion']" ] }, { "cell_type": "markdown", "id": "edde3d4b", "metadata": {}, "source": [ "# It is advisable to split before applying pipelines because it prevents data leakage." ] }, { "cell_type": "code", "execution_count": 11, "id": "27d7f976-c28f-449e-ae1a-53a42bbda4e8", "metadata": {}, "outputs": [], "source": [ "# Split Data\n", "x_train,x_test,y_train,y_test = train_test_split(Xfeatures,ylabels,test_size=0.3,random_state=42)" ] }, { "cell_type": "code", "execution_count": 12, "id": "2f086f29-dba9-40d2-a9dd-f06a6cca3a4c", "metadata": {}, "outputs": [], "source": [ "# Build Pipeline\n", "from sklearn.pipeline import Pipeline" ] }, { "cell_type": "code", "execution_count": 13, "id": "6b81cc86-2bef-40c2-b9a3-668caaadaff0", "metadata": {}, "outputs": [], "source": [ "# LogisticRegression Pipeline\n", "pipe_lr = Pipeline(steps=[('cv',CountVectorizer()),('lr',LogisticRegression())])" ] }, { "cell_type": "code", "execution_count": 14, "id": "dc64b9a7-efe2-4bc4-a0e7-46dff1d52b31", "metadata": { "jupyter": { "outputs_hidden": true }, "scrolled": false, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\Sanket\\anaconda3\\envs\\nlp\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:458: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n" ] }, { "data": { "text/html": [ "
Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Train and Fit Data\n", "pipe_lr.fit(x_train,y_train)" ] }, { "cell_type": "code", "execution_count": 15, "id": "135ed6f8-56ff-4d53-85e3-541e3a7ae2d7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('cv', CountVectorizer()), ('lr', LogisticRegression())])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipe_lr" ] }, { "cell_type": "code", "execution_count": 16, "id": "28396371-5f5c-4a3b-b974-164e047764f3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.619946349875455" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Check Accuracy\n", "pipe_lr.score(x_test,y_test)" ] }, { "cell_type": "code", "execution_count": 17, "id": "eb3a26b6-d09e-422f-991b-b08c48f55b75", "metadata": {}, "outputs": [], "source": [ "# Make A Prediction\n", "ex1 = \"This book was so interesting it made me happy\"" ] }, { "cell_type": "code", "execution_count": 18, "id": "b08597d9-6f59-45cb-a648-95b0da1ce313", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['joy'], dtype=object)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipe_lr.predict([ex1])" ] }, { "cell_type": "code", "execution_count": 19, "id": "5b3822ac-17fc-43dd-9bb7-8dad07a4d32c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1.60505334e-03, 7.06448086e-03, 6.95652453e-03, 9.43810868e-01,\n", " 1.00440585e-04, 2.63232385e-02, 6.63277122e-05, 1.40730665e-02]])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Prediction Prob\n", "pipe_lr.predict_proba([ex1])" ] }, { "cell_type": "code", "execution_count": 20, "id": "5b7c4596-d643-48e5-a777-79a6f55c49da", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'shame',\n", " 'surprise'], dtype=object)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# To Know the classes\n", "pipe_lr.classes_" ] }, { "cell_type": "code", "execution_count": 21, "id": "c0d40f62-b1fd-4748-a279-c8f50c748f26", "metadata": {}, "outputs": [], "source": [ "# Save Model & Pipeline\n", "import joblib\n", "pipeline_file = open(\"../models/emotion_classifier_pipe_lr.pkl\",\"wb\")\n", "joblib.dump(pipe_lr,pipeline_file)\n", "pipeline_file.close()" ] }, { "cell_type": "code", "execution_count": null, "id": "377c4e98-67f0-45e5-8dd5-0417585754f0", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.9" } }, "nbformat": 4, "nbformat_minor": 5 }