diff --git "a/G_project.ipynb" "b/G_project.ipynb"
new file mode 100644--- /dev/null
+++ "b/G_project.ipynb"
@@ -0,0 +1,7149 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "xJ3YIFEAxbfz",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "7348ee14-40c9-4afd-a51a-8b48147924c0"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "--2024-08-18 15:03:31--  https://alt.qcri.org/resources/OSACT2022/OSACT2022-sharedTask-train.txt\n",
+            "Resolving alt.qcri.org (alt.qcri.org)... 37.186.61.205\n",
+            "Connecting to alt.qcri.org (alt.qcri.org)|37.186.61.205|:443... connected.\n",
+            "HTTP request sent, awaiting response... 302 Found\n",
+            "Location: http://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-train.txt [following]\n",
+            "--2024-08-18 15:03:32--  http://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-train.txt\n",
+            "Connecting to alt.qcri.org (alt.qcri.org)|37.186.61.205|:80... connected.\n",
+            "HTTP request sent, awaiting response... 302 Moved Temporarily\n",
+            "Location: https://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-train.txt [following]\n",
+            "--2024-08-18 15:03:32--  https://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-train.txt\n",
+            "Connecting to alt.qcri.org (alt.qcri.org)|37.186.61.205|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 1502506 (1.4M) [text/plain]\n",
+            "Saving to: ‘OSACT2022-sharedTask-train.txt.2’\n",
+            "\n",
+            "OSACT2022-sharedTas 100%[===================>]   1.43M  1.19MB/s    in 1.2s    \n",
+            "\n",
+            "2024-08-18 15:03:34 (1.19 MB/s) - ‘OSACT2022-sharedTask-train.txt.2’ saved [1502506/1502506]\n",
+            "\n",
+            "--2024-08-18 15:03:34--  https://alt.qcri.org/resources/OSACT2022/OSACT2022-sharedTask-dev.txt\n",
+            "Resolving alt.qcri.org (alt.qcri.org)... 37.186.61.205\n",
+            "Connecting to alt.qcri.org (alt.qcri.org)|37.186.61.205|:443... connected.\n",
+            "HTTP request sent, awaiting response... 302 Found\n",
+            "Location: http://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-dev.txt [following]\n",
+            "--2024-08-18 15:03:34--  http://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-dev.txt\n",
+            "Connecting to alt.qcri.org (alt.qcri.org)|37.186.61.205|:80... connected.\n",
+            "HTTP request sent, awaiting response... 302 Moved Temporarily\n",
+            "Location: https://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-dev.txt [following]\n",
+            "--2024-08-18 15:03:34--  https://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-dev.txt\n",
+            "Connecting to alt.qcri.org (alt.qcri.org)|37.186.61.205|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 211355 (206K) [text/plain]\n",
+            "Saving to: ‘OSACT2022-sharedTask-dev.txt.2’\n",
+            "\n",
+            "OSACT2022-sharedTas 100%[===================>] 206.40K   626KB/s    in 0.3s    \n",
+            "\n",
+            "2024-08-18 15:03:35 (626 KB/s) - ‘OSACT2022-sharedTask-dev.txt.2’ saved [211355/211355]\n",
+            "\n",
+            "--2024-08-18 15:03:36--  https://alt.qcri.org/resources/OSACT2022/OSACT2022-sharedTask-test-tweets.txt\n",
+            "Resolving alt.qcri.org (alt.qcri.org)... 37.186.61.205\n",
+            "Connecting to alt.qcri.org (alt.qcri.org)|37.186.61.205|:443... connected.\n",
+            "HTTP request sent, awaiting response... 302 Found\n",
+            "Location: http://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-test-tweets.txt [following]\n",
+            "--2024-08-18 15:03:36--  http://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-test-tweets.txt\n",
+            "Connecting to alt.qcri.org (alt.qcri.org)|37.186.61.205|:80... connected.\n",
+            "HTTP request sent, awaiting response... 302 Moved Temporarily\n",
+            "Location: https://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-test-tweets.txt [following]\n",
+            "--2024-08-18 15:03:36--  https://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-test-tweets.txt\n",
+            "Connecting to alt.qcri.org (alt.qcri.org)|37.186.61.205|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 348638 (340K) [text/plain]\n",
+            "Saving to: ‘OSACT2022-sharedTask-test-tweets.txt.2’\n",
+            "\n",
+            "OSACT2022-sharedTas 100%[===================>] 340.47K   705KB/s    in 0.5s    \n",
+            "\n",
+            "2024-08-18 15:03:37 (705 KB/s) - ‘OSACT2022-sharedTask-test-tweets.txt.2’ saved [348638/348638]\n",
+            "\n",
+            "--2024-08-18 15:03:37--  https://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-test-taskA-gold-labels.txt\n",
+            "Resolving alt.qcri.org (alt.qcri.org)... 37.186.61.205\n",
+            "Connecting to alt.qcri.org (alt.qcri.org)|37.186.61.205|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 16780 (16K) [text/plain]\n",
+            "Saving to: ‘OSACT2022-sharedTask-test-taskA-gold-labels.txt.2’\n",
+            "\n",
+            "OSACT2022-sharedTas 100%[===================>]  16.39K  --.-KB/s    in 0s      \n",
+            "\n",
+            "2024-08-18 15:03:38 (174 MB/s) - ‘OSACT2022-sharedTask-test-taskA-gold-labels.txt.2’ saved [16780/16780]\n",
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "!wget \"https://alt.qcri.org/resources/OSACT2022/OSACT2022-sharedTask-train.txt\"\n",
+        "!wget \"https://alt.qcri.org/resources/OSACT2022/OSACT2022-sharedTask-dev.txt\"\n",
+        "!wget \"https://alt.qcri.org/resources/OSACT2022/OSACT2022-sharedTask-test-tweets.txt\"\n",
+        "!wget \"https://alt.qcri.org/resources1/OSACT2022/OSACT2022-sharedTask-test-taskA-gold-labels.txt\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "id": "7ltyuVxxzGRT",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "outputId": "a8d27b74-d188-4bb6-bf8b-070f4ab2d7b8"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "         1                           @USER ردينا ع التطنز 😏👊🏻      OFF  \\\n",
+              "0        2               وصارت فطاير البقالات غذاء صحي 👎🏻 URL  NOT_OFF   \n",
+              "1        3  @USER روحي لبريده تلقين اشباه كثير بس ماحد زيك...      OFF   \n",
+              "2        4         @USER مش باين حاجه خالص 😣<LF>مش عارف بقى 😔  NOT_OFF   \n",
+              "3        5  #اليوم_الاثنين<LF><LF>👏 يقولك :%90  من المسلمي...  NOT_OFF   \n",
+              "4        6                   حمدلله ماحطها في فمي اساسا😷🤢 URL  NOT_OFF   \n",
+              "...    ...                                                ...      ...   \n",
+              "8881  8883                                @USER الله يلعنهم 🤢      OFF   \n",
+              "8882  8884  واحد سال زوجته بعد كم سنة زواج:<LF>- حبيبتي كم...  NOT_OFF   \n",
+              "8883  8885  @USER يالله روح زي الشاطر واحذف الشو الي سويته...      OFF   \n",
+              "8884  8886  لـمـا الـكـلاب تـهـوهـو عـلـيـك🐶🐶 وأنـت_اللـى_...  NOT_OFF   \n",
+              "8885  8887  #بايع_الكليجا<LF><LF>الله ياخذكم ي بنات خلوني ...  NOT_OFF   \n",
+              "\n",
+              "      NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "0     NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "1     NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "2     NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "3     NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "4     NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "...      ...      ...      ...  \n",
+              "8881  NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "8882  NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "8883  NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "8884  NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "8885  NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "\n",
+              "[8886 rows x 6 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-c1ab181e-0f3f-4424-b0dc-2174d1be8d78\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>1</th>\n",
+              "      <th>@USER ردينا ع التطنز 😏👊🏻</th>\n",
+              "      <th>OFF</th>\n",
+              "      <th>NOT_HS</th>\n",
+              "      <th>NOT_VLG</th>\n",
+              "      <th>NOT_VIO</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>2</td>\n",
+              "      <td>وصارت فطاير البقالات غذاء صحي 👎🏻 URL</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>3</td>\n",
+              "      <td>@USER روحي لبريده تلقين اشباه كثير بس ماحد زيك...</td>\n",
+              "      <td>OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>4</td>\n",
+              "      <td>@USER مش باين حاجه خالص 😣&lt;LF&gt;مش عارف بقى 😔</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>5</td>\n",
+              "      <td>#اليوم_الاثنين&lt;LF&gt;&lt;LF&gt;👏 يقولك :%90  من المسلمي...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>6</td>\n",
+              "      <td>حمدلله ماحطها في فمي اساسا😷🤢 URL</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8881</th>\n",
+              "      <td>8883</td>\n",
+              "      <td>@USER الله يلعنهم 🤢</td>\n",
+              "      <td>OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8882</th>\n",
+              "      <td>8884</td>\n",
+              "      <td>واحد سال زوجته بعد كم سنة زواج:&lt;LF&gt;- حبيبتي كم...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8883</th>\n",
+              "      <td>8885</td>\n",
+              "      <td>@USER يالله روح زي الشاطر واحذف الشو الي سويته...</td>\n",
+              "      <td>OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8884</th>\n",
+              "      <td>8886</td>\n",
+              "      <td>لـمـا الـكـلاب تـهـوهـو عـلـيـك🐶🐶 وأنـت_اللـى_...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8885</th>\n",
+              "      <td>8887</td>\n",
+              "      <td>#بايع_الكليجا&lt;LF&gt;&lt;LF&gt;الله ياخذكم ي بنات خلوني ...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>8886 rows × 6 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c1ab181e-0f3f-4424-b0dc-2174d1be8d78')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-c1ab181e-0f3f-4424-b0dc-2174d1be8d78 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-c1ab181e-0f3f-4424-b0dc-2174d1be8d78');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-0179cb86-5621-4941-a1ff-637aa1f6f4eb\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-0179cb86-5621-4941-a1ff-637aa1f6f4eb')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-0179cb86-5621-4941-a1ff-637aa1f6f4eb button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "  <div id=\"id_7f8c56fa-6da5-4fc0-8e9f-3f736b1dd080\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('train_data')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_7f8c56fa-6da5-4fc0-8e9f-3f736b1dd080 button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('train_data');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "train_data",
+              "summary": "{\n  \"name\": \"train_data\",\n  \"rows\": 8886,\n  \"fields\": [\n    {\n      \"column\": \"1\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 2565,\n        \"min\": 2,\n        \"max\": 8887,\n        \"num_unique_values\": 8886,\n        \"samples\": [\n          8738,\n          1890,\n          4986\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"@USER \\u0631\\u062f\\u064a\\u0646\\u0627 \\u0639 \\u0627\\u0644\\u062a\\u0637\\u0646\\u0632 \\ud83d\\ude0f\\ud83d\\udc4a\\ud83c\\udffb\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8886,\n        \"samples\": [\n          \"@USER \\u0644\\u0627 \\u0648\\u0627\\u0644\\u0644\\u0647 \\u0645\\u0627 \\u0623\\u0631\\u0641\\u0636 \\u060c \\u0628\\u0633 \\u0644\\u0627\\u0632\\u0645 \\u062a\\u0643\\u0648\\u0646 \\u0648\\u064a\\u0627\\u064a \\u0634\\u0631\\u064a\\u0643\\u0629 \\u0627\\u0644\\u062d\\u064a\\u0627\\u0629 \\ud83d\\ude0c\\u270b\\ud83c\\udffb \\u0639\\u0634\\u0627\\u0646 \\u0646\\u0635\\u064a\\u0631 \\u0637\\u0631\\u0632\\u0627\\u0646 \\u0648\\u0637\\u0631\\u0632\\u0627\\u0646\\u0647 \\ud83d\\udc35\",\n          \"\\u0639\\u0646 \\u0634\\u0639\\u0648\\u0631 \\u0623\\u062e\\u062f \\u0645\\u063a\\u0627\\u062f\\u0631\\u0629 \\u0639\\u0627\\u0644\\u0669 \\u0639\\u0634\\u0627\\u0646 \\u0645\\u0648\\u0639\\u062f \\u0627\\u0644\\u062f\\u0643\\u062a\\u0648\\u0631 \\u0639\\u0627\\u0644\\u0669:\\u0663\\u0660 \\u0648 \\u0623\\u0648\\u0635\\u0644 \\u0669:\\u0661\\u0665 \\u0648\\u062a\\u0635\\u064a\\u0631 \\u0661\\u0660:\\u0663\\u0660 \\u0648\\u0644\\u0633\\u0647 \\u0645\\u0627 \\u0627\\u062c\\u0649 \\u0645\\u0648\\u0639\\u062f\\u064a!!!!!!!! \\ud83d\\ude21 URL\",\n          \"@USER \\u0644\\u0627 \\u0644\\u0627 \\u0627\\u0644\\u0648\\u0636\\u0639 \\u0632\\u0627\\u062f \\u0639\\u0646 \\u062d\\u062f\\u0647 \\ud83d\\udc4a\\ud83c\\udffc\\ud83c\\udfc3\\ud83c\\udffb\\u200d\\u2640\\ufe0f\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"OFF\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"OFF\",\n          \"NOT_OFF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"NOT_HS\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 6,\n        \"samples\": [\n          \"NOT_HS\",\n          \"HS2\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"NOT_VLG\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"VLG\",\n          \"NOT_VLG\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"NOT_VIO\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"VIO\",\n          \"NOT_VIO\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 3
+        }
+      ],
+      "source": [
+        "import pandas as pd\n",
+        "import csv\n",
+        "train_data = pd.read_csv(\"OSACT2022-sharedTask-train.txt\", sep=\"\\t\", quoting=csv.QUOTE_NONE)\n",
+        "dev_data = pd.read_csv(\"OSACT2022-sharedTask-dev.txt\", sep=\"\\t\", quoting=csv.QUOTE_NONE)\n",
+        "test_data = pd.read_csv(\"OSACT2022-sharedTask-test-tweets.txt\", sep=\"\\t\", quoting=csv.QUOTE_NONE)\n",
+        "train_data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "F7Jy__azzGVC",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "outputId": "1f05a317-dd92-4c3b-c69f-68ff9ce85479"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                               @USER ردينا ع التطنز 😏👊🏻      OFF\n",
+              "0                  وصارت فطاير البقالات غذاء صحي 👎🏻 URL  NOT_OFF\n",
+              "1     @USER روحي لبريده تلقين اشباه كثير بس ماحد زيك...      OFF\n",
+              "2            @USER مش باين حاجه خالص 😣<LF>مش عارف بقى 😔  NOT_OFF\n",
+              "3     #اليوم_الاثنين<LF><LF>👏 يقولك :%90  من المسلمي...  NOT_OFF\n",
+              "4                      حمدلله ماحطها في فمي اساسا😷🤢 URL  NOT_OFF\n",
+              "...                                                 ...      ...\n",
+              "8881                                @USER الله يلعنهم 🤢      OFF\n",
+              "8882  واحد سال زوجته بعد كم سنة زواج:<LF>- حبيبتي كم...  NOT_OFF\n",
+              "8883  @USER يالله روح زي الشاطر واحذف الشو الي سويته...      OFF\n",
+              "8884  لـمـا الـكـلاب تـهـوهـو عـلـيـك🐶🐶 وأنـت_اللـى_...  NOT_OFF\n",
+              "8885  #بايع_الكليجا<LF><LF>الله ياخذكم ي بنات خلوني ...  NOT_OFF\n",
+              "\n",
+              "[8886 rows x 2 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-7461b779-b016-44d9-bac9-aa6655876590\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>@USER ردينا ع التطنز 😏👊🏻</th>\n",
+              "      <th>OFF</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>وصارت فطاير البقالات غذاء صحي 👎🏻 URL</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>@USER روحي لبريده تلقين اشباه كثير بس ماحد زيك...</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>@USER مش باين ح��جه خالص 😣&lt;LF&gt;مش عارف بقى 😔</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>#اليوم_الاثنين&lt;LF&gt;&lt;LF&gt;👏 يقولك :%90  من المسلمي...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>حمدلله ماحطها في فمي اساسا😷🤢 URL</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8881</th>\n",
+              "      <td>@USER الله يلعنهم 🤢</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8882</th>\n",
+              "      <td>واحد سال زوجته بعد كم سنة زواج:&lt;LF&gt;- حبيبتي كم...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8883</th>\n",
+              "      <td>@USER يالله روح زي الشاطر واحذف الشو الي سويته...</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8884</th>\n",
+              "      <td>لـمـا الـكـلاب تـهـوهـو عـلـيـك🐶🐶 وأنـت_اللـى_...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8885</th>\n",
+              "      <td>#بايع_الكليجا&lt;LF&gt;&lt;LF&gt;الله ياخذكم ي بنات خلوني ...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>8886 rows × 2 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7461b779-b016-44d9-bac9-aa6655876590')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-7461b779-b016-44d9-bac9-aa6655876590 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-7461b779-b016-44d9-bac9-aa6655876590');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-52b63757-8fa9-49f6-85cf-0ccc2206291f\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-52b63757-8fa9-49f6-85cf-0ccc2206291f')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-52b63757-8fa9-49f6-85cf-0ccc2206291f button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "  <div id=\"id_b52e722d-8596-4768-a033-77060444e548\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('train_data')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_b52e722d-8596-4768-a033-77060444e548 button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('train_data');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "train_data",
+              "summary": "{\n  \"name\": \"train_data\",\n  \"rows\": 8886,\n  \"fields\": [\n    {\n      \"column\": \"@USER \\u0631\\u062f\\u064a\\u0646\\u0627 \\u0639 \\u0627\\u0644\\u062a\\u0637\\u0646\\u0632 \\ud83d\\ude0f\\ud83d\\udc4a\\ud83c\\udffb\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8886,\n        \"samples\": [\n          \"@USER \\u0644\\u0627 \\u0648\\u0627\\u0644\\u0644\\u0647 \\u0645\\u0627 \\u0623\\u0631\\u0641\\u0636 \\u060c \\u0628\\u0633 \\u0644\\u0627\\u0632\\u0645 \\u062a\\u0643\\u0648\\u0646 \\u0648\\u064a\\u0627\\u064a \\u0634\\u0631\\u064a\\u0643\\u0629 \\u0627\\u0644\\u062d\\u064a\\u0627\\u0629 \\ud83d\\ude0c\\u270b\\ud83c\\udffb \\u0639\\u0634\\u0627\\u0646 \\u0646\\u0635\\u064a\\u0631 \\u0637\\u0631\\u0632\\u0627\\u0646 \\u0648\\u0637\\u0631\\u0632\\u0627\\u0646\\u0647 \\ud83d\\udc35\",\n          \"\\u0639\\u0646 \\u0634\\u0639\\u0648\\u0631 \\u0623\\u062e\\u062f \\u0645\\u063a\\u0627\\u062f\\u0631\\u0629 \\u0639\\u0627\\u0644\\u0669 \\u0639\\u0634\\u0627\\u0646 \\u0645\\u0648\\u0639\\u062f \\u0627\\u0644\\u062f\\u0643\\u062a\\u0648\\u0631 \\u0639\\u0627\\u0644\\u0669:\\u0663\\u0660 \\u0648 \\u0623\\u0648\\u0635\\u0644 \\u0669:\\u0661\\u0665 \\u0648\\u062a\\u0635\\u064a\\u0631 \\u0661\\u0660:\\u0663\\u0660 \\u0648\\u0644\\u0633\\u0647 \\u0645\\u0627 \\u0627\\u062c\\u0649 \\u0645\\u0648\\u0639\\u062f\\u064a!!!!!!!! \\ud83d\\ude21 URL\",\n          \"@USER \\u0644\\u0627 \\u0644\\u0627 \\u0627\\u0644\\u0648\\u0636\\u0639 \\u0632\\u0627\\u062f \\u0639\\u0646 \\u062d\\u062f\\u0647 \\ud83d\\udc4a\\ud83c\\udffc\\ud83c\\udfc3\\ud83c\\udffb\\u200d\\u2640\\ufe0f\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"OFF\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"OFF\",\n          \"NOT_OFF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 4
+        }
+      ],
+      "source": [
+        "train_data = train_data.drop(columns=['1', 'NOT_HS', 'NOT_VLG' , 'NOT_VIO'])\n",
+        "train_data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "id": "crd119tuzGZx",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "outputId": "9ed78838-a108-497d-c635-5745a7b6e087"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                   Text    label\n",
+              "0                  وصارت فطاير البقالات غذاء صحي 👎🏻 URL  NOT_OFF\n",
+              "1     @USER روحي لبريده تلقين اشباه كثير بس ماحد زيك...      OFF\n",
+              "2            @USER مش باين حاجه خالص 😣<LF>مش عارف بقى 😔  NOT_OFF\n",
+              "3     #اليوم_الاثنين<LF><LF>👏 يقولك :%90  من المسلمي...  NOT_OFF\n",
+              "4                      حمدلله ماحطها في فمي اساسا😷🤢 URL  NOT_OFF\n",
+              "...                                                 ...      ...\n",
+              "8881                                @USER الله يلعنهم 🤢      OFF\n",
+              "8882  واحد سال زوجته بعد كم سنة زواج:<LF>- حبيبتي كم...  NOT_OFF\n",
+              "8883  @USER يالله روح زي الشاطر واحذف الشو الي سويته...      OFF\n",
+              "8884  لـمـا الـكـلاب تـهـوهـو عـلـيـك🐶🐶 وأنـت_اللـى_...  NOT_OFF\n",
+              "8885  #بايع_الكليجا<LF><LF>الله ياخذكم ي بنات خلوني ...  NOT_OFF\n",
+              "\n",
+              "[8886 rows x 2 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-22b41a67-9bec-4cbb-9573-bc7c5ca0c6d8\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>وصارت فطاير البقالات غذاء صحي 👎🏻 URL</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>@USER روحي لبريده تلقين اشباه كثير بس ماحد زيك...</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>@USER مش باين حاجه خالص 😣&lt;LF&gt;مش عارف بقى 😔</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>#اليوم_الاثنين&lt;LF&gt;&lt;LF&gt;👏 يقولك :%90  من المسلمي...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>حمدلله ماحطها في فمي اساسا😷🤢 URL</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8881</th>\n",
+              "      <td>@USER الله يلعنهم 🤢</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8882</th>\n",
+              "      <td>واحد سال زوجته بعد كم سنة زواج:&lt;LF&gt;- حبيبتي كم...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8883</th>\n",
+              "      <td>@USER يالله روح زي الشاطر واحذف الشو الي سويته...</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8884</th>\n",
+              "      <td>لـمـا الـكـلاب تـهـوهـو عـلـيـك🐶🐶 وأنـت_اللـى_...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>8885</th>\n",
+              "      <td>#بايع_الكليجا&lt;LF&gt;&lt;LF&gt;الله ياخذكم ي بنات خلوني ...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>8886 rows × 2 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-22b41a67-9bec-4cbb-9573-bc7c5ca0c6d8')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-22b41a67-9bec-4cbb-9573-bc7c5ca0c6d8 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-22b41a67-9bec-4cbb-9573-bc7c5ca0c6d8');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-9895cbcc-7eaa-49d7-b02a-464f8f19e5f4\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-9895cbcc-7eaa-49d7-b02a-464f8f19e5f4')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-9895cbcc-7eaa-49d7-b02a-464f8f19e5f4 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "  <div id=\"id_6191d468-e512-43d1-8fd1-145c73e5a7dc\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('train_data')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_6191d468-e512-43d1-8fd1-145c73e5a7dc button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('train_data');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "train_data",
+              "summary": "{\n  \"name\": \"train_data\",\n  \"rows\": 8886,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8886,\n        \"samples\": [\n          \"@USER \\u0644\\u0627 \\u0648\\u0627\\u0644\\u0644\\u0647 \\u0645\\u0627 \\u0623\\u0631\\u0641\\u0636 \\u060c \\u0628\\u0633 \\u0644\\u0627\\u0632\\u0645 \\u062a\\u0643\\u0648\\u0646 \\u0648\\u064a\\u0627\\u064a \\u0634\\u0631\\u064a\\u0643\\u0629 \\u0627\\u0644\\u062d\\u064a\\u0627\\u0629 \\ud83d\\ude0c\\u270b\\ud83c\\udffb \\u0639\\u0634\\u0627\\u0646 \\u0646\\u0635\\u064a\\u0631 \\u0637\\u0631\\u0632\\u0627\\u0646 \\u0648\\u0637\\u0631\\u0632\\u0627\\u0646\\u0647 \\ud83d\\udc35\",\n          \"\\u0639\\u0646 \\u0634\\u0639\\u0648\\u0631 \\u0623\\u062e\\u062f \\u0645\\u063a\\u0627\\u062f\\u0631\\u0629 \\u0639\\u0627\\u0644\\u0669 \\u0639\\u0634\\u0627\\u0646 \\u0645\\u0648\\u0639\\u062f \\u0627\\u0644\\u062f\\u0643\\u062a\\u0648\\u0631 \\u0639\\u0627\\u0644\\u0669:\\u0663\\u0660 \\u0648 \\u0623\\u0648\\u0635\\u0644 \\u0669:\\u0661\\u0665 \\u0648\\u062a\\u0635\\u064a\\u0631 \\u0661\\u0660:\\u0663\\u0660 \\u0648\\u0644\\u0633\\u0647 \\u0645\\u0627 \\u0627\\u062c\\u0649 \\u0645\\u0648\\u0639\\u062f\\u064a!!!!!!!! \\ud83d\\ude21 URL\",\n          \"@USER \\u0644\\u0627 \\u0644\\u0627 \\u0627\\u0644\\u0648\\u0636\\u0639 \\u0632\\u0627\\u062f \\u0639\\u0646 \\u062d\\u062f\\u0647 \\ud83d\\udc4a\\ud83c\\udffc\\ud83c\\udfc3\\ud83c\\udffb\\u200d\\u2640\\ufe0f\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"OFF\",\n          \"NOT_OFF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 5
+        }
+      ],
+      "source": [
+        "train_data = train_data.rename(columns={\"@USER ردينا ع التطنز 😏👊🏻\": \"Text\"})\n",
+        "train_data = train_data.rename(columns={\"OFF\": \"label\"})\n",
+        "train_data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "id": "SakRj7VGzc6t",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "outputId": "05f6082b-f0cc-4879-df68-32e1c70e0902"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "       8888    @USER افطرت عليك بعقاء واثنين من فروخها الجن 🔪😂  NOT_OFF  \\\n",
+              "0      8889                   #داليا_مبارك<LF>مادري ليش تقرفت😷  NOT_OFF   \n",
+              "1      8890  RT @USER ابديت السناب  الجديد ❌<LF>حاس الناس ح...  NOT_OFF   \n",
+              "2      8891                     @USER هييه والله وااايدد 🔪🔪🔪💔💔  NOT_OFF   \n",
+              "3      8892                                  اكيد اخس شي 😤 URL  NOT_OFF   \n",
+              "4      8893                           مابي شي الحين غير فراشي😣  NOT_OFF   \n",
+              "...     ...                                                ...      ...   \n",
+              "1264  10153  @USER روما محظوظين بذا المدرب بيروتي يسحب في ر...  NOT_OFF   \n",
+              "1265  10154  @USER هلا والله بالحب<LF>هلا لولو❤️😙<LF>...<LF...      OFF   \n",
+              "1266  10155  رينز فاز 😡😡😡😡😡😡😡 يعنني اوه شوفو العرض الأسطوري...      OFF   \n",
+              "1267  10156  @USER ييييع والله شيء يلوع الكبد مريضات الله ي...      OFF   \n",
+              "1268  10157   @USER تحسينها ع كليجه م اكلت شي واضح من الصوت 😷😷  NOT_OFF   \n",
+              "\n",
+              "      NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "0     NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "1     NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "2     NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "3     NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "4     NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "...      ...      ...      ...  \n",
+              "1264  NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "1265  NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "1266  NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "1267  NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "1268  NOT_HS  NOT_VLG  NOT_VIO  \n",
+              "\n",
+              "[1269 rows x 6 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-7efabaa7-bf4f-49af-853b-62bc6b927636\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>8888</th>\n",
+              "      <th>@USER افطرت عليك بعقاء واثنين من فروخها الجن 🔪😂</th>\n",
+              "      <th>NOT_OFF</th>\n",
+              "      <th>NOT_HS</th>\n",
+              "      <th>NOT_VLG</th>\n",
+              "      <th>NOT_VIO</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>8889</td>\n",
+              "      <td>#داليا_مبارك&lt;LF&gt;مادري ليش تقرفت😷</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>8890</td>\n",
+              "      <td>RT @USER ابديت السناب  الجديد ❌&lt;LF&gt;حاس الناس ح...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>8891</td>\n",
+              "      <td>@USER هييه والله وااايدد 🔪🔪🔪💔💔</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>8892</td>\n",
+              "      <td>اكيد اخس شي 😤 URL</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>8893</td>\n",
+              "      <td>مابي شي الحين غير فراشي😣</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1264</th>\n",
+              "      <td>10153</td>\n",
+              "      <td>@USER روما محظوظين بذا المدرب بيروتي يسحب في ر...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1265</th>\n",
+              "      <td>10154</td>\n",
+              "      <td>@USER هلا والله بالحب&lt;LF&gt;هلا لولو❤️😙&lt;LF&gt;...&lt;LF...</td>\n",
+              "      <td>OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1266</th>\n",
+              "      <td>10155</td>\n",
+              "      <td>رينز فاز 😡😡😡😡😡😡😡 يعنني اوه شوفو العرض الأسطوري...</td>\n",
+              "      <td>OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1267</th>\n",
+              "      <td>10156</td>\n",
+              "      <td>@USER ييييع والله شيء يلوع الكبد مريضات الله ي...</td>\n",
+              "      <td>OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1268</th>\n",
+              "      <td>10157</td>\n",
+              "      <td>@USER تحسينها ع كليجه م اكلت شي واضح من الصوت 😷😷</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "      <td>NOT_HS</td>\n",
+              "      <td>NOT_VLG</td>\n",
+              "      <td>NOT_VIO</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>1269 rows × 6 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7efabaa7-bf4f-49af-853b-62bc6b927636')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-7efabaa7-bf4f-49af-853b-62bc6b927636 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-7efabaa7-bf4f-49af-853b-62bc6b927636');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-1c8c75be-5b0d-4040-9833-dce93925b8bf\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-1c8c75be-5b0d-4040-9833-dce93925b8bf')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-1c8c75be-5b0d-4040-9833-dce93925b8bf button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "  <div id=\"id_ed01c886-ac90-4cc9-9844-714e021f774c\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('dev_data')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_ed01c886-ac90-4cc9-9844-714e021f774c button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('dev_data');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "dev_data",
+              "summary": "{\n  \"name\": \"dev_data\",\n  \"rows\": 1269,\n  \"fields\": [\n    {\n      \"column\": \"8888\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 366,\n        \"min\": 8889,\n        \"max\": 10157,\n        \"num_unique_values\": 1269,\n        \"samples\": [\n          10102,\n          9692,\n          9311\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"@USER \\u0627\\u0641\\u0637\\u0631\\u062a \\u0639\\u0644\\u064a\\u0643 \\u0628\\u0639\\u0642\\u0627\\u0621 \\u0648\\u0627\\u062b\\u0646\\u064a\\u0646 \\u0645\\u0646 \\u0641\\u0631\\u0648\\u062e\\u0647\\u0627 \\u0627\\u0644\\u062c\\u0646 \\ud83d\\udd2a\\ud83d\\ude02\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1269,\n        \"samples\": [\n          \"\\u0644\\u0648 \\u0643\\u0627\\u0646 \\u0627\\u0644\\u0627\\u0646\\u062a\\u0638\\u0627\\u0631 \\u0631\\u062c\\u0644\\u0627\\u064b \\u0644\\u0642\\u062a\\u0644\\u062a\\u0647 \\ud83d\\ude21\",\n          \"@USER \\u0639\\u0646\\u062f\\u064a \\u0637\\u0641\\u0644\\u0647 \\u0635\\u063a\\u064a\\u0631\\u0629 \\u0643\\u0644 \\u0645\\u0627 \\u0633\\u0648\\u062a \\u0645\\u0635\\u064a\\u0628\\u0647 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d<LF>\\u0631\\u0645\\u062a \\u0627\\u0644\\u0645\\u0641\\u062a\\u0627\\u062d \\u0645\\u0639 \\u0627\\u0644\\u0634\\u0628\\u0627\\u0643\\u060c \\u0644\\u064a\\u0634 \\u064a\\u0627\\u0628\\u0627\\u0628\\u0627 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d<LF>\\u0631\\u0645\\u062a \\u0645\\u0639\\u062c\\u0648\\u0646 \\u0627\\u0644\\u0627\\u0633\\u0646\\u0627\\u0646 \\u0644\\u064a\\u0634 \\u064a\\u0627 \\u0628\\u0627\\u0628\\u0627 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d<LF>\\u0632\\u0639\\u0644\\u062a \\u0639\\u0644\\u064a \\u0642\\u0627\\u0644\\u062a \\u0644\\u064a \\u0643\\u0644 \\u062a\\u0631\\u0627\\u0628<LF>\\u0627\\u0646\\u0635\\u062f\\u0645\\u062a \\u0645\\u062f\\u0631\\u064a \\u0645\\u0646 \\u0641\\u064a\\u0646 \\u062c\\u0627\\u064a\\u0628\\u0629 \\u0647\\u0627\\u0644\\u0643\\u0644\\u0645\\u0629<LF>\\u0642\\u0644\\u062a \\u0627\\u064a\\u0634 \\u0627\\u064a\\u0634 \\ud83d\\ude20 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d \\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\",\n          \"#\\u0628\\u0627\\u064a\\u0639_\\u0627\\u0644\\u0643\\u0644\\u064a\\u062c\\u0627<LF><LF>\\u0627\\u0646\\u0627 \\u0627\\u0644\\u0628\\u0646\\u062a \\u0627\\u0644\\u0648\\u062d\\u064a\\u062f\\u0647 \\u0627\\u0644\\u064a \\u0627\\u0634\\u0648\\u0641 \\u0628\\u0627\\u064a\\u0639 \\u0627\\u0644\\u0643\\u0644\\u064a\\u062c\\u0627 <LF><LF>( \\u062c\\u064a\\u0643\\u0631 ) \\ud83e\\udd2e\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"NOT_OFF\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"OFF\",\n          \"NOT_OFF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"NOT_HS\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 7,\n        \"samples\": [\n          \"NOT_HS\",\n          \"HS3\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"NOT_VLG\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"VLG\",\n          \"NOT_VLG\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"NOT_VIO\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"VIO\",\n          \"NOT_VIO\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 6
+        }
+      ],
+      "source": [
+        "dev_data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "JtryssNFzfr1"
+      },
+      "outputs": [],
+      "source": [
+        "dev_data = dev_data.drop(columns=['8888', 'NOT_HS', 'NOT_VLG' , 'NOT_VIO'])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "id": "xL3h20zRzgW6",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "outputId": "60a29bf1-5bb1-4ad9-de44-94abea53f377"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                   Text    label\n",
+              "0                      #داليا_مبارك<LF>مادري ليش تقرفت😷  NOT_OFF\n",
+              "1     RT @USER ابديت السناب  الجديد ❌<LF>حاس الناس ح...  NOT_OFF\n",
+              "2                        @USER هييه والله وااايدد 🔪🔪🔪💔💔  NOT_OFF\n",
+              "3                                     اكيد اخس شي 😤 URL  NOT_OFF\n",
+              "4                              مابي شي الحين غير فراشي😣  NOT_OFF\n",
+              "...                                                 ...      ...\n",
+              "1264  @USER روما محظوظين بذا المدرب بيروتي يسحب في ر...  NOT_OFF\n",
+              "1265  @USER هلا والله بالحب<LF>هلا لولو❤️😙<LF>...<LF...      OFF\n",
+              "1266  رينز فاز 😡😡😡😡😡😡😡 يعنني اوه شوفو العرض الأسطوري...      OFF\n",
+              "1267  @USER ييييع والله شيء يلوع الكبد مريضات الله ي...      OFF\n",
+              "1268   @USER تحسينها ع كليجه م اكلت شي واضح من الصوت 😷😷  NOT_OFF\n",
+              "\n",
+              "[1269 rows x 2 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-b1851ea7-9be6-4832-b91c-cea2748d38db\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>#داليا_مبارك&lt;LF&gt;مادري ليش تقرفت😷</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>RT @USER ابديت السناب  الجديد ❌&lt;LF&gt;حاس الناس ح...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>@USER هييه والله وااايدد 🔪🔪🔪💔💔</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>اكيد اخس شي 😤 URL</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>مابي شي الحين غير فراشي😣</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1264</th>\n",
+              "      <td>@USER روما محظوظين بذا المدرب بيروتي يسحب في ر...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1265</th>\n",
+              "      <td>@USER هلا والله بالحب&lt;LF&gt;هلا لولو❤️😙&lt;LF&gt;...&lt;LF...</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1266</th>\n",
+              "      <td>رينز فاز 😡😡😡😡😡😡😡 يعنني اوه شوفو العرض الأسطوري...</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1267</th>\n",
+              "      <td>@USER ييييع والله شيء يلوع الكبد مريضات الله ي...</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1268</th>\n",
+              "      <td>@USER تحسينها ع كليجه م اكلت شي واضح من الصوت 😷😷</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>1269 rows × 2 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b1851ea7-9be6-4832-b91c-cea2748d38db')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-b1851ea7-9be6-4832-b91c-cea2748d38db button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-b1851ea7-9be6-4832-b91c-cea2748d38db');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-73719a37-3d08-4f77-a517-c0d106742c6d\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-73719a37-3d08-4f77-a517-c0d106742c6d')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-73719a37-3d08-4f77-a517-c0d106742c6d button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "  <div id=\"id_5a07cc87-1712-4757-8e3d-d500d3550d17\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('dev_data')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_5a07cc87-1712-4757-8e3d-d500d3550d17 button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('dev_data');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "dev_data",
+              "summary": "{\n  \"name\": \"dev_data\",\n  \"rows\": 1269,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1269,\n        \"samples\": [\n          \"\\u0644\\u0648 \\u0643\\u0627\\u0646 \\u0627\\u0644\\u0627\\u0646\\u062a\\u0638\\u0627\\u0631 \\u0631\\u062c\\u0644\\u0627\\u064b \\u0644\\u0642\\u062a\\u0644\\u062a\\u0647 \\ud83d\\ude21\",\n          \"@USER \\u0639\\u0646\\u062f\\u064a \\u0637\\u0641\\u0644\\u0647 \\u0635\\u063a\\u064a\\u0631\\u0629 \\u0643\\u0644 \\u0645\\u0627 \\u0633\\u0648\\u062a \\u0645\\u0635\\u064a\\u0628\\u0647 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d<LF>\\u0631\\u0645\\u062a \\u0627\\u0644\\u0645\\u0641\\u062a\\u0627\\u062d \\u0645\\u0639 \\u0627\\u0644\\u0634\\u0628\\u0627\\u0643\\u060c \\u0644\\u064a\\u0634 \\u064a\\u0627\\u0628\\u0627\\u0628\\u0627 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d<LF>\\u0631\\u0645\\u062a \\u0645\\u0639\\u062c\\u0648\\u0646 \\u0627\\u0644\\u0627\\u0633\\u0646\\u0627\\u0646 \\u0644\\u064a\\u0634 \\u064a\\u0627 \\u0628\\u0627\\u0628\\u0627 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d<LF>\\u0632\\u0639\\u0644\\u062a \\u0639\\u0644\\u064a \\u0642\\u0627\\u0644\\u062a \\u0644\\u064a \\u0643\\u0644 \\u062a\\u0631\\u0627\\u0628<LF>\\u0627\\u0646\\u0635\\u062f\\u0645\\u062a \\u0645\\u062f\\u0631\\u064a \\u0645\\u0646 \\u0641\\u064a\\u0646 \\u062c\\u0627\\u064a\\u0628\\u0629 \\u0647\\u0627\\u0644\\u0643\\u0644\\u0645\\u0629<LF>\\u0642\\u0644\\u062a \\u0627\\u064a\\u0634 \\u0627\\u064a\\u0634 \\ud83d\\ude20 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d \\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\\u0647\",\n          \"#\\u0628\\u0627\\u064a\\u0639_\\u0627\\u0644\\u0643\\u0644\\u064a\\u062c\\u0627<LF><LF>\\u0627\\u0646\\u0627 \\u0627\\u0644\\u0628\\u0646\\u062a \\u0627\\u0644\\u0648\\u062d\\u064a\\u062f\\u0647 \\u0627\\u0644\\u064a \\u0627\\u0634\\u0648\\u0641 \\u0628\\u0627\\u064a\\u0639 \\u0627\\u0644\\u0643\\u0644\\u064a\\u062c\\u0627 <LF><LF>( \\u062c\\u064a\\u0643\\u0631 ) \\ud83e\\udd2e\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"OFF\",\n          \"NOT_OFF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 8
+        }
+      ],
+      "source": [
+        "dev_data = dev_data.rename(columns={\"@USER افطرت عليك بعقاء واثنين من فروخها الجن 🔪😂\": \"Text\"})\n",
+        "dev_data = dev_data.rename(columns={\"NOT_OFF\": \"label\"})\n",
+        "dev_data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "id": "NjrUivmwzgb6",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "outputId": "86b4f0eb-25b6-432a-8c03-af7560e772bd"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "      10158                     @USER هتهزر معايا ولا ايه 😡😡😡😡\n",
+              "0     10159           مشفتش العرض بتاعهم لا مش مهتمة لا😩🐸😂 URL\n",
+              "1     10160  RT @USER عندما تكون لوحدك تحس انك لحالك صح 😊<L...\n",
+              "2     10161  RT @USER ماشاء الله الرجال باين عليه محترم <LF...\n",
+              "3     10162                          @USER شسالفة احد يفهمني 😤\n",
+              "4     10163  @USER اقووووول استريح عاد احتفالاتنا تحط اغاني...\n",
+              "...     ...                                                ...\n",
+              "2535  12694  قله حيا وين اهلهم ذولي الله لايبلانا لهالدرجه ...\n",
+              "2536  12695                         RT @USER ثم الطحلبه🐸🐸🐸 URL\n",
+              "2537  12696     يا وجه الله 😷 من اليوم ورايح شاورما انسى 🔪 URL\n",
+              "2538  12697                    @USER متخلف حتى الحلال حرمتوه 😷\n",
+              "2539  12698  @USER حنا خقينا على بنت رئيس مو على بياع كليجا...\n",
+              "\n",
+              "[2540 rows x 2 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-dfe9107e-41aa-4f63-82a2-8e6e51955823\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>10158</th>\n",
+              "      <th>@USER هتهزر معايا ولا ايه 😡😡😡😡</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>10159</td>\n",
+              "      <td>مشفتش العرض بتاعهم لا مش مهتمة لا😩🐸😂 URL</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>10160</td>\n",
+              "      <td>RT @USER عندما تكون لوحدك تحس انك لحالك صح 😊&lt;L...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>10161</td>\n",
+              "      <td>RT @USER ماشاء الله الرجال باين عليه محترم &lt;LF...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>10162</td>\n",
+              "      <td>@USER شسالفة احد يفهمني 😤</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>10163</td>\n",
+              "      <td>@USER اقووووول استريح عاد احتفالاتنا تحط اغاني...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2535</th>\n",
+              "      <td>12694</td>\n",
+              "      <td>قله حيا وين اهلهم ذولي الله لايبلانا لهالدرجه ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2536</th>\n",
+              "      <td>12695</td>\n",
+              "      <td>RT @USER ثم الطحلبه🐸🐸🐸 URL</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2537</th>\n",
+              "      <td>12696</td>\n",
+              "      <td>يا وجه الله 😷 من اليوم ورايح شاورما انسى 🔪 URL</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2538</th>\n",
+              "      <td>12697</td>\n",
+              "      <td>@USER متخلف حتى الحلال حرمتوه 😷</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2539</th>\n",
+              "      <td>12698</td>\n",
+              "      <td>@USER حنا خقينا على بنت رئيس مو على بياع كليجا...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2540 rows × 2 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-dfe9107e-41aa-4f63-82a2-8e6e51955823')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-dfe9107e-41aa-4f63-82a2-8e6e51955823 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-dfe9107e-41aa-4f63-82a2-8e6e51955823');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-44aab3af-dc8b-4854-be9f-e33a4ffac693\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-44aab3af-dc8b-4854-be9f-e33a4ffac693')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-44aab3af-dc8b-4854-be9f-e33a4ffac693 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "  <div id=\"id_eea2c891-31b0-41c1-9b65-4cd94ba3fb7f\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('test_data')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_eea2c891-31b0-41c1-9b65-4cd94ba3fb7f button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('test_data');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "test_data",
+              "summary": "{\n  \"name\": \"test_data\",\n  \"rows\": 2540,\n  \"fields\": [\n    {\n      \"column\": \"10158\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 733,\n        \"min\": 10159,\n        \"max\": 12698,\n        \"num_unique_values\": 2540,\n        \"samples\": [\n          11716,\n          10293,\n          11798\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"@USER \\u0647\\u062a\\u0647\\u0632\\u0631 \\u0645\\u0639\\u0627\\u064a\\u0627 \\u0648\\u0644\\u0627 \\u0627\\u064a\\u0647 \\ud83d\\ude21\\ud83d\\ude21\\ud83d\\ude21\\ud83d\\ude21\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 2540,\n        \"samples\": [\n          \"\\u064a\\u0627\\u0643\\u0631\\u0647\\u064a \\u0644\\u0647 \\u0627\\u0644\\u063a\\u064a\\u0627\\u0645\\u0647 \\ud83d\\ude37\\ud83d\\udc94\\ud83d\\udc94\\ud83d\\udc94\\ud83d\\udc94 URL\",\n          \"RT @USER \\u0633\\u0639\\u0648\\u062f\\u064a\\u0647 \\u0631\\u0627\\u0643\\u0628\\u0647 \\u0632\\u0628 \\u0633\\u0648\\u0627\\u0642\\u0647\\u0627\\ud83d\\udc60 URL\",\n          \"\\u0634\\u0648\\u0636\\u0639 \\u0627\\u0644\\u062c\\u0648 \\u0645\\u0639\\u064a \\u0643\\u0644 \\u0645\\u0627 \\u0627\\u063a\\u064a\\u0628 \\u064a\\u0632\\u064a\\u0646 \\u0644\\u0647\\u062f\\u0631\\u062c\\u0629 \\u0634\\u0624\\u0645 \\u0635\\u0631\\u062a \\u0639\\u0644\\u0649 \\u0627\\u0644\\u062d\\u064a\\u0627\\u0629\\ud83d\\ude21 URL\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 9
+        }
+      ],
+      "source": [
+        "test_data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
+      "metadata": {
+        "id": "LJsmEXA9zggW"
+      },
+      "outputs": [],
+      "source": [
+        "test_data = test_data.drop(columns=['10158'])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {
+        "id": "jT_O7vSbzgio",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "outputId": "38e99acb-dcf4-48dd-ea7d-67e4637e2f4f"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                   Text\n",
+              "0              مشفتش العرض بتاعهم لا مش مهتمة لا😩🐸😂 URL\n",
+              "1     RT @USER عندما تكون لوحدك تحس انك لحالك صح 😊<L...\n",
+              "2     RT @USER ماشاء الله الرجال باين عليه محترم <LF...\n",
+              "3                             @USER شسالفة احد يفهمني 😤\n",
+              "4     @USER اقووووول استريح عاد احتفالاتنا تحط اغاني...\n",
+              "...                                                 ...\n",
+              "2535  قله حيا وين اهلهم ذولي الله لايبلانا لهالدرجه ...\n",
+              "2536                         RT @USER ثم الطحلبه🐸🐸🐸 URL\n",
+              "2537     يا وجه الله 😷 من اليوم ورايح شاورما انسى 🔪 URL\n",
+              "2538                    @USER متخلف حتى الحلال حرمتوه 😷\n",
+              "2539  @USER حنا خقينا على بنت رئيس مو على بياع كليجا...\n",
+              "\n",
+              "[2540 rows x 1 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-b74564b5-3ae3-424f-b484-ab747278aff0\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>مشفتش العرض بتاعهم لا مش مهتمة لا😩🐸😂 URL</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>RT @USER عندما تكون لوحدك تحس انك لحالك صح 😊&lt;L...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>RT @USER ماشاء الله الرجال باين عليه محترم &lt;LF...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>@USER شسالفة احد يفهمني 😤</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>@USER اقووووول استريح عاد احتفالاتنا تحط اغاني...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2535</th>\n",
+              "      <td>قله حيا وين اهلهم ذولي الله لايبلانا لهالدرجه ...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2536</th>\n",
+              "      <td>RT @USER ثم الطحلبه🐸🐸🐸 URL</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2537</th>\n",
+              "      <td>يا وجه الله 😷 من اليوم ورايح شاورما انسى 🔪 URL</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2538</th>\n",
+              "      <td>@USER متخلف حتى الحلال حرمتوه 😷</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2539</th>\n",
+              "      <td>@USER حنا خقينا على بنت رئيس مو على بياع كليجا...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2540 rows × 1 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b74564b5-3ae3-424f-b484-ab747278aff0')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-b74564b5-3ae3-424f-b484-ab747278aff0 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-b74564b5-3ae3-424f-b484-ab747278aff0');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-39161215-d71d-437e-81ff-23939c22fc4f\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-39161215-d71d-437e-81ff-23939c22fc4f')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-39161215-d71d-437e-81ff-23939c22fc4f button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "  <div id=\"id_7298f31c-36d9-43ba-a423-5268a0acdee4\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('test_data')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_7298f31c-36d9-43ba-a423-5268a0acdee4 button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('test_data');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "test_data",
+              "summary": "{\n  \"name\": \"test_data\",\n  \"rows\": 2540,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 2540,\n        \"samples\": [\n          \"\\u064a\\u0627\\u0643\\u0631\\u0647\\u064a \\u0644\\u0647 \\u0627\\u0644\\u063a\\u064a\\u0627\\u0645\\u0647 \\ud83d\\ude37\\ud83d\\udc94\\ud83d\\udc94\\ud83d\\udc94\\ud83d\\udc94 URL\",\n          \"RT @USER \\u0633\\u0639\\u0648\\u062f\\u064a\\u0647 \\u0631\\u0627\\u0643\\u0628\\u0647 \\u0632\\u0628 \\u0633\\u0648\\u0627\\u0642\\u0647\\u0627\\ud83d\\udc60 URL\",\n          \"\\u0634\\u0648\\u0636\\u0639 \\u0627\\u0644\\u062c\\u0648 \\u0645\\u0639\\u064a \\u0643\\u0644 \\u0645\\u0627 \\u0627\\u063a\\u064a\\u0628 \\u064a\\u0632\\u064a\\u0646 \\u0644\\u0647\\u062f\\u0631\\u062c\\u0629 \\u0634\\u0624\\u0645 \\u0635\\u0631\\u062a \\u0639\\u0644\\u0649 \\u0627\\u0644\\u062d\\u064a\\u0627\\u0629\\ud83d\\ude21 URL\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 11
+        }
+      ],
+      "source": [
+        "test_data = test_data.rename(columns={\"@USER هتهزر معايا ولا ايه 😡😡😡😡\": \"Text\"})\n",
+        "test_data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "test_labels = pd.read_csv(\"OSACT2022-sharedTask-test-taskA-gold-labels.txt\", sep=\"\\t\", quoting=csv.QUOTE_NONE)\n",
+        "test_labels = test_labels.rename(columns={\"NOT_OFF\": \"label\"})\n",
+        "test_data = test_data.join(test_labels)\n",
+        "test_data"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "id": "UY1EuBsegOr3",
+        "outputId": "67459fb8-db65-42f3-cefe-9265da8916c3"
+      },
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                   Text    label\n",
+              "0              مشفتش العرض بتاعهم لا مش مهتمة لا😩🐸😂 URL  NOT_OFF\n",
+              "1     RT @USER عندما تكون لوحدك تحس انك لحالك صح 😊<L...  NOT_OFF\n",
+              "2     RT @USER ماشاء الله الرجال باين عليه محترم <LF...  NOT_OFF\n",
+              "3                             @USER شسالفة احد يفهمني 😤  NOT_OFF\n",
+              "4     @USER اقووووول استريح عاد احتفالاتنا تحط اغاني...  NOT_OFF\n",
+              "...                                                 ...      ...\n",
+              "2535  قله حيا وين اهلهم ذولي الله لايبلانا لهالدرجه ...      OFF\n",
+              "2536                         RT @USER ثم الطحلبه🐸🐸🐸 URL      OFF\n",
+              "2537     يا وجه الله 😷 من اليوم ورايح شاورما انسى 🔪 URL  NOT_OFF\n",
+              "2538                    @USER متخلف حتى الحلال حرمتوه 😷      OFF\n",
+              "2539  @USER حنا خقينا على بنت رئيس مو على بياع كليجا...  NOT_OFF\n",
+              "\n",
+              "[2540 rows x 2 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-c8bd2b8a-17ab-4fce-937c-2831ce40c261\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>مشفتش العرض بتاعهم لا مش مهتمة لا😩🐸😂 URL</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>RT @USER عندما تكون لوحدك تحس انك لحالك صح 😊&lt;L...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>RT @USER ماشاء الله الرجال باين عليه محترم &lt;LF...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>@USER شسالفة احد يفهمني 😤</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>@USER اقووووول استريح عاد احتفالاتنا تحط اغاني...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2535</th>\n",
+              "      <td>قله حيا وين اهلهم ذولي الله لايبلانا لهالدرجه ...</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2536</th>\n",
+              "      <td>RT @USER ثم الطحلبه🐸🐸🐸 URL</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2537</th>\n",
+              "      <td>يا وجه الله 😷 من اليوم ورايح شاورما انسى 🔪 URL</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2538</th>\n",
+              "      <td>@USER متخلف حتى الحلال حرمتوه 😷</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2539</th>\n",
+              "      <td>@USER حنا خقينا على بنت رئيس مو على بياع كليجا...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2540 rows × 2 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c8bd2b8a-17ab-4fce-937c-2831ce40c261')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-c8bd2b8a-17ab-4fce-937c-2831ce40c261 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-c8bd2b8a-17ab-4fce-937c-2831ce40c261');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-577f3042-28c5-4e47-a80a-0c4e6d7ffa1e\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-577f3042-28c5-4e47-a80a-0c4e6d7ffa1e')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-577f3042-28c5-4e47-a80a-0c4e6d7ffa1e button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "  <div id=\"id_2b057884-4b5c-4431-a163-8d83a998331c\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('test_data')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_2b057884-4b5c-4431-a163-8d83a998331c button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('test_data');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "test_data",
+              "summary": "{\n  \"name\": \"test_data\",\n  \"rows\": 2540,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 2540,\n        \"samples\": [\n          \"\\u064a\\u0627\\u0643\\u0631\\u0647\\u064a \\u0644\\u0647 \\u0627\\u0644\\u063a\\u064a\\u0627\\u0645\\u0647 \\ud83d\\ude37\\ud83d\\udc94\\ud83d\\udc94\\ud83d\\udc94\\ud83d\\udc94 URL\",\n          \"RT @USER \\u0633\\u0639\\u0648\\u062f\\u064a\\u0647 \\u0631\\u0627\\u0643\\u0628\\u0647 \\u0632\\u0628 \\u0633\\u0648\\u0627\\u0642\\u0647\\u0627\\ud83d\\udc60 URL\",\n          \"\\u0634\\u0648\\u0636\\u0639 \\u0627\\u0644\\u062c\\u0648 \\u0645\\u0639\\u064a \\u0643\\u0644 \\u0645\\u0627 \\u0627\\u063a\\u064a\\u0628 \\u064a\\u0632\\u064a\\u0646 \\u0644\\u0647\\u062f\\u0631\\u062c\\u0629 \\u0634\\u0624\\u0645 \\u0635\\u0631\\u062a \\u0639\\u0644\\u0649 \\u0627\\u0644\\u062d\\u064a\\u0627\\u0629\\ud83d\\ude21 URL\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"OFF\",\n          \"NOT_OFF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zo3bQIomz58b"
+      },
+      "source": [
+        "# **DOWNLOADING A LIST OF ARABIC STOPWORDS**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "metadata": {
+        "id": "GEIXZTykzgkt",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "bef05ca9-90f6-42eb-90ea-ec513181d1c8"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "--2024-08-18 15:03:42--  https://raw.githubusercontent.com/alaa-a-a/multi-dialect-arabic-stop-words/main/Stop-words/stop_list_1177.txt\n",
+            "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
+            "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
+            "HTTP request sent, awaiting response... 200 OK\n",
+            "Length: 11468 (11K) [text/plain]\n",
+            "Saving to: ‘stop_list_1177.txt.2’\n",
+            "\n",
+            "stop_list_1177.txt. 100%[===================>]  11.20K  --.-KB/s    in 0s      \n",
+            "\n",
+            "2024-08-18 15:03:43 (89.4 MB/s) - ‘stop_list_1177.txt.2’ saved [11468/11468]\n",
+            "\n"
+          ]
+        }
+      ],
+      "source": [
+        "# Alharbi, Alaa, and Mark Lee. \"Kawarith: an Arabic Twitter Corpus for Crisis Events.\"\n",
+        "# Proceedings of the Sixth Arabic Natural Language Processing Workshop. 2021\n",
+        "\n",
+        "!wget https://raw.githubusercontent.com/alaa-a-a/multi-dialect-arabic-stop-words/main/Stop-words/stop_list_1177.txt\n",
+        "arabic_stop_words = []\n",
+        "with open ('./stop_list_1177.txt',encoding='utf-8') as f :\n",
+        "    for word in f.readlines() :\n",
+        "        arabic_stop_words.append(word.split(\"\\n\")[0])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "metadata": {
+        "id": "cp8SmC170cqH",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "c3844a28-7993-44e2-b5c0-56d0fa85e4de"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n"
+          ]
+        }
+      ],
+      "source": [
+        "import nltk\n",
+        "from nltk.corpus import stopwords\n",
+        "from nltk.tokenize import WordPunctTokenizer\n",
+        "from nltk.stem.isri import ISRIStemmer\n",
+        "import string\n",
+        "import re\n",
+        "from bs4 import BeautifulSoup\n",
+        "nltk.download('stopwords')\n",
+        "\n",
+        "\n",
+        "tok = WordPunctTokenizer()\n",
+        "\n",
+        "def normalize_arabic(text):\n",
+        "    text = re.sub(\"[إأآا]\", \"ا\", text)\n",
+        "    text = re.sub(\"ى\", \"ي\", text)\n",
+        "    text = re.sub(\"ؤ\", \"ء\", text)\n",
+        "    text = re.sub(\"ئ\", \"ء\", text)\n",
+        "    text = re.sub(\"ة\", \"ه\", text)\n",
+        "    text = re.sub(\"گ\", \"ك\", text)\n",
+        "    return text\n",
+        "\n",
+        "\n",
+        "def remove_diacritics(text):\n",
+        "    arabic_diacritics = re.compile(\"\"\"\n",
+        "                             ّ    | # Tashdid\n",
+        "                             َ    | # Fatha\n",
+        "                             ً    | # Tanwin Fath\n",
+        "                             ُ    | # Damma\n",
+        "                             ٌ    | # Tanwin Damm\n",
+        "                             ِ    | # Kasra\n",
+        "                             ٍ    | # Tanwin Kasr\n",
+        "                             ْ    | # Sukun\n",
+        "                             ـ     # Tatwil/Kashida\n",
+        "                         \"\"\", re.VERBOSE)\n",
+        "    return re.sub(arabic_diacritics, '', text)\n",
+        "\n",
+        "\n",
+        "def remove_punctuations(text):\n",
+        "    arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:\"؟.,'{}~¦+|!”…“–ـ'''\n",
+        "    english_punctuations = string.punctuation\n",
+        "    punctuations_list = arabic_punctuations + english_punctuations\n",
+        "    translator = str.maketrans('', '', punctuations_list)\n",
+        "    return text.translate(translator)\n",
+        "\n",
+        "\n",
+        "def remove_repeating_char(text):\n",
+        "    # return re.sub(r'(.)\\1+', r'\\1', text)     # keep only 1 repeat\n",
+        "    return re.sub(r'(.)\\1+', r'\\1\\1', text)  # keep 2 repeat\n",
+        "\n",
+        "def remove_stop_words(text):\n",
+        "    word_list = nltk.tokenize.wordpunct_tokenize(text.lower())\n",
+        "    word_list = [ w for w in word_list if not w in arabic_stop_words]\n",
+        "    return (\" \".join(word_list)).strip()\n",
+        "\n",
+        "\n",
+        "\n",
+        "def remove_non_arabic_letters(text):\n",
+        "    text = re.sub(r'([@A-Za-z0-9_]+)|#|http\\S+', ' ', text) # removes non arabic letters\n",
+        "    text = re.sub(r'ـــــــــــــ', '', text) # removes non arabic letters\n",
+        "    return text\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "def clean_str(text):\n",
+        "    text = remove_non_arabic_letters(text)\n",
+        "    text = remove_punctuations(text)\n",
+        "    text = remove_diacritics(text)\n",
+        "    text = remove_repeating_char(text)\n",
+        "    # text = remove_stop_words(text)\n",
+        "\n",
+        "    # Extract text from HTML tags, especially when dealing with data from 𝕏 (Twitter)\n",
+        "    soup = BeautifulSoup(text, 'lxml')\n",
+        "    souped = soup.get_text()\n",
+        "    pat1 = r'@[A-Za-z0-9]+'\n",
+        "    pat2 = r'https?://[A-Za-z0-9./]+'\n",
+        "    combined_pat = r'|'.join((pat1, pat2))\n",
+        "    stripped = re.sub(combined_pat, '', souped)\n",
+        "    try:\n",
+        "        clean = stripped.decode(\"utf-8-sig\").replace(u\"\\ufffd\", \"?\")\n",
+        "    except:\n",
+        "        clean = stripped\n",
+        "\n",
+        "    words = tok.tokenize(clean)\n",
+        "    return (\" \".join(words)).strip()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KU_cHcIf2H_V"
+      },
+      "source": [
+        "## **applying preprocessing on our dataset**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 15,
+      "metadata": {
+        "id": "ouoJlEWv0c0M",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 241
+        },
+        "outputId": "0ff531cb-be6e-47bc-adaf-2e10989e214f"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Cleaning and parsing the training dataset...\n",
+            "\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                Text    label\n",
+              "0                   وصارت فطاير البقالات غذاء صحي 👎🏻  NOT_OFF\n",
+              "1  روحي لبريده تلقين اشباه كثير بس ماحد زيكم مشفو...      OFF\n",
+              "2                  مش باين حاجه خالص 😣 مش عارف بقى 😔  NOT_OFF\n",
+              "3  اليوم الاثنين 👏 يقولك من المسلمين عندهم خاله ا...  NOT_OFF\n",
+              "4                      حمدلله ماحطها في فمي اساسا 😷🤢  NOT_OFF"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-3f1d1a36-c0b0-46ec-8ebe-26c2b8bc47ae\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>وصارت فطاير البقالات غذاء صحي 👎🏻</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>روحي لبريده تلقين اشباه كثير بس ماحد زيكم مشفو...</td>\n",
+              "      <td>OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>مش باين حاجه خالص 😣 مش عارف بقى 😔</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>اليوم الاثنين 👏 يقولك من المسلمين عندهم خاله ا...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>حمدلله ماحطها في فمي اساسا 😷🤢</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3f1d1a36-c0b0-46ec-8ebe-26c2b8bc47ae')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-3f1d1a36-c0b0-46ec-8ebe-26c2b8bc47ae button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-3f1d1a36-c0b0-46ec-8ebe-26c2b8bc47ae');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-9d1fccca-429e-4e66-8c4c-7fa0514ac675\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-9d1fccca-429e-4e66-8c4c-7fa0514ac675')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-9d1fccca-429e-4e66-8c4c-7fa0514ac675 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "train_data",
+              "summary": "{\n  \"name\": \"train_data\",\n  \"rows\": 8886,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8870,\n        \"samples\": [\n          \"\\u0647\\u0647 \\u0645\\u0648 \\u0628\\u0646\\u062a\\u064a \\ud83d\\udc4a\\ud83d\\udc4a\\ud83d\\ude02 \\u0645\\u0627\\u0632\\u0644\\u062a \\u0639\\u0630\\u0628\\u0627\\u0621 \\u0648\\u0627\\u0641\\u062a\\u062e\\u0631 \\ud83d\\ude0c\\ud83d\\ude02\",\n          \"\\u064a\\u0627\\u0631\\u0628 \\u0635\\u0628\\u0631\\u0646\\u064a \\u0639\\u0644\\u0649 \\u0644\\u0648\\u0645\\u064a \\ud83d\\udc94\\ud83d\\udc94 \\u064a\\u0635\\u062d\\u064a\\u0646\\u064a \\u0645\\u0646 \\u0639\\u0632 \\u0646\\u0648\\u0645\\u064a \\u0628\\u0627\\u0644\\u0636\\u0631\\u0628 \\u0639\\u0644\\u0649 \\u0631\\u0627\\u0633\\u064a \\ud83d\\ude21\\ud83d\\udc4a\",\n          \"\\u0648\\u0627\\u0644\\u0644\\u0647 \\u0627\\u0644\\u0646\\u0627\\u0642\\u0635\\u0647 \\u0627\\u0645\\u0643 \\u0644\\u0644\\u0627\\u0633\\u0641 \\u062a\\u062d\\u062a\\u0627\\u062c \\u0645\\u0646\\u0643\\u0645 \\u0627\\u0646 \\u062a\\u0643\\u0645\\u0644\\u0648\\u0647\\u0627 \\u0627\\u0645\\u0627 \\u0646\\u062d\\u0646 \\u0648\\u0627\\u0644\\u0644\\u0647 \\u0627\\u0644\\u062d\\u0645\\u062f \\u0643\\u0627\\u0645\\u0644\\u064a\\u0646 \\u0639\\u0642\\u0644 \\u064a\\u0627\\u0646\\u0627\\u0642\\u0635 \\u0627\\u0644\\u0639\\u0642\\u0644 \\u0644\\u0627 \\u0648\\u0627\\u0639\\u0644\\u0627\\u0645\\u064a \\ud83e\\udd22\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"OFF\",\n          \"NOT_OFF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 15
+        }
+      ],
+      "source": [
+        "print(\"Cleaning and parsing the training dataset...\\n\")\n",
+        "\n",
+        "train_data[\"Text\"] = train_data[\"Text\"].apply(lambda x: clean_str(x))\n",
+        "\n",
+        "train_data.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "metadata": {
+        "id": "Gv3DF9UE0c3N",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 241
+        },
+        "outputId": "39a88f27-303c-4928-d1da-3e47ab1a7ade"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Cleaning and parsing the development dataset...\n",
+            "\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                Text    label\n",
+              "0                      داليا مبارك مادري ليش تقرفت 😷  NOT_OFF\n",
+              "1  ابديت السناب الجديد ❌ حاس الناس حوس أشوف مشاهي...  NOT_OFF\n",
+              "2                             هييه والله واايدد 🔪🔪💔💔  NOT_OFF\n",
+              "3                                      اكيد اخس شي 😤  NOT_OFF\n",
+              "4                          مابي شي الحين غير فراشي 😣  NOT_OFF"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-90083e94-12bf-4027-8889-3ae04674312a\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>داليا مبارك مادري ليش تقرفت 😷</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>ابديت السناب الجديد ❌ حاس الناس حوس أشوف مشاهي...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>هييه والله واايدد 🔪🔪💔💔</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>اكيد اخس شي 😤</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>مابي شي الحين غير فراشي 😣</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-90083e94-12bf-4027-8889-3ae04674312a')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-90083e94-12bf-4027-8889-3ae04674312a button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-90083e94-12bf-4027-8889-3ae04674312a');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-8eeaba63-b104-4e9a-86e0-5e1ad417cce7\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-8eeaba63-b104-4e9a-86e0-5e1ad417cce7')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-8eeaba63-b104-4e9a-86e0-5e1ad417cce7 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "dev_data",
+              "summary": "{\n  \"name\": \"dev_data\",\n  \"rows\": 1269,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1268,\n        \"samples\": [\n          \"\\u0644\\u0648 \\u0643\\u0627\\u0646 \\u0627\\u0644\\u0627\\u0646\\u062a\\u0638\\u0627\\u0631 \\u0631\\u062c\\u0644\\u0627 \\u0644\\u0642\\u062a\\u0644\\u062a\\u0647 \\ud83d\\ude21\",\n          \"\\u0639\\u0646\\u062f\\u064a \\u0637\\u0641\\u0644\\u0647 \\u0635\\u063a\\u064a\\u0631\\u0629 \\u0643\\u0644 \\u0645\\u0627 \\u0633\\u0648\\u062a \\u0645\\u0635\\u064a\\u0628\\u0647 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d \\u0631\\u0645\\u062a \\u0627\\u0644\\u0645\\u0641\\u062a\\u0627\\u062d \\u0645\\u0639 \\u0627\\u0644\\u0634\\u0628\\u0627\\u0643 \\u0644\\u064a\\u0634 \\u064a\\u0627\\u0628\\u0627\\u0628\\u0627 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d \\u0631\\u0645\\u062a \\u0645\\u0639\\u062c\\u0648\\u0646 \\u0627\\u0644\\u0627\\u0633\\u0646\\u0627\\u0646 \\u0644\\u064a\\u0634 \\u064a\\u0627 \\u0628\\u0627\\u0628\\u0627 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d \\u0632\\u0639\\u0644\\u062a \\u0639\\u0644\\u064a \\u0642\\u0627\\u0644\\u062a \\u0644\\u064a \\u0643\\u0644 \\u062a\\u0631\\u0627\\u0628 \\u0627\\u0646\\u0635\\u062f\\u0645\\u062a \\u0645\\u062f\\u0631\\u064a \\u0645\\u0646 \\u0641\\u064a\\u0646 \\u062c\\u0627\\u064a\\u0628\\u0629 \\u0647\\u0627\\u0644\\u0643\\u0644\\u0645\\u0629 \\u0642\\u0644\\u062a \\u0627\\u064a\\u0634 \\u0627\\u064a\\u0634 \\ud83d\\ude20 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d \\u0647\\u0647\",\n          \"\\u0648\\u0639\\u0642\\u0644\\u0643 \\u0643\\u0627\\u0646 \\u0641\\u064a\\u0646 \\u0645\\u0646 \\u0627\\u0644\\u0623\\u0648\\u0644 \\u064a\\u0627 \\u0633\\u0643\\u0631 \\ud83d\\ude02\\ud83d\\ude02 \\u0648\\u0627\\u0646\\u062a \\u062a\\u0642\\u062f\\u0631 \\u062a\\u0633\\u0639\\u062f\\u0647\\u0627 \\u064a\\u0627\\u0641\\u0627\\u0644\\u062d \\u0628\\u0633 \\u0627\\u0644\\u0628\\u062f\\u0627\\u064a\\u0629 \\u063a\\u0644\\u0637 \\ud83d\\udc4a\\ud83d\\udc4a\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"OFF\",\n          \"NOT_OFF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 16
+        }
+      ],
+      "source": [
+        "print(\"Cleaning and parsing the development dataset...\\n\")\n",
+        "\n",
+        "dev_data[\"Text\"] = dev_data[\"Text\"].apply(lambda x: clean_str(x))\n",
+        "\n",
+        "dev_data.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(\"Cleaning and parsing the test dataset...\\n\")\n",
+        "\n",
+        "test_data[\"Text\"] = test_data[\"Text\"].apply(lambda x: clean_str(x))\n",
+        "\n",
+        "test_data.head()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 241
+        },
+        "id": "5Y4-Z0cZjgFz",
+        "outputId": "189d4705-d232-41cc-9500-be7f91c4a6fa"
+      },
+      "execution_count": 17,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Cleaning and parsing the test dataset...\n",
+            "\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                Text    label\n",
+              "0              مشفتش العرض بتاعهم لا مش مهتمة لا 😩🐸😂  NOT_OFF\n",
+              "1  عندما تكون لوحدك تحس انك لحالك صح 😊 حتى انا مث...  NOT_OFF\n",
+              "2  ماشاء الله الرجال باين عليه محترم البنات متى ت...  NOT_OFF\n",
+              "3                                شسالفة احد يفهمني 😤  NOT_OFF\n",
+              "4         اقوول استريح عاد احتفالاتنا تحط اغانينا 😡😡  NOT_OFF"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-0b7c1edc-2e7f-4d3c-baed-6872755723a8\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>مشفتش العرض بتاعهم لا مش مهتمة لا 😩🐸😂</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>عندما تكون لوحدك تحس انك لحالك صح 😊 حتى انا مث...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>ماشاء الله الرجال باين عليه محترم البنات متى ت...</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>شسالفة احد يفهمني 😤</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>اقوول استريح عاد احتفالاتنا تحط اغانينا 😡😡</td>\n",
+              "      <td>NOT_OFF</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-0b7c1edc-2e7f-4d3c-baed-6872755723a8')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-0b7c1edc-2e7f-4d3c-baed-6872755723a8 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-0b7c1edc-2e7f-4d3c-baed-6872755723a8');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-9fe2b0a8-7ecc-4d5d-8ab6-73220f1eac83\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-9fe2b0a8-7ecc-4d5d-8ab6-73220f1eac83')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-9fe2b0a8-7ecc-4d5d-8ab6-73220f1eac83 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "test_data",
+              "summary": "{\n  \"name\": \"test_data\",\n  \"rows\": 2540,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 2539,\n        \"samples\": [\n          \"\\u0647\\u0647 \\ud83d\\ude02\\ud83d\\ude02\\ud83d\\udc94\\ud83d\\udd2a\",\n          \"\\u0627\\u0648\\u0648\\u0641 \\ud83d\\ude21\",\n          \"\\u0628\\u0627\\u064a\\u0639 \\u0627\\u0644\\u0643\\u0644\\u064a\\u062c\\u0627\\u0627\\u0644\\u0644\\u0647 \\u064a\\u0633\\u062a\\u0631 \\u0639\\u0644\\u064a\\u0647 \\u0645\\u0646 \\u0627\\u062e\\u0648\\u0627\\u062a \\u0632\\u0644\\u064a\\u062e\\u0647 \\u062c\\u0645\\u0627\\u0644\\u0647 \\u0639\\u0627\\u064a\\u062f\\u064a \\u0637\\u064a\\u0628 \\u0644\\u0648\\u0634\\u0627\\u0641\\u0646 \\u064a\\u0648\\u0633\\u0641 \\u0639\\u0644\\u064a\\u0647 \\u0627\\u0644\\u0633\\u0644\\u0627\\u0645 \\u0648\\u0634 \\u0628\\u064a\\u0642\\u0637\\u0639\\u0646 \\ud83d\\udd2a\\ud83d\\udd2a\\ud83d\\udc94\\ud83d\\ude02\\ud83d\\ude02\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"OFF\",\n          \"NOT_OFF\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 17
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 18,
+      "metadata": {
+        "id": "__s92VxN0c5y"
+      },
+      "outputs": [],
+      "source": [
+        "label2id = {\"NOT_OFF\": 0,\"OFF\": 1}\n",
+        "id2label = {0: \"NOT_OFF\", 1: \"OFF\"}"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 19,
+      "metadata": {
+        "id": "NpHWFtFk0c8b",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "outputId": "649be2fe-70c0-4a22-a078-3d7824982b95"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                Text  label\n",
+              "0                   وصارت فطاير البقالات غذاء صحي 👎🏻      0\n",
+              "1  روحي لبريده تلقين اشباه كثير بس ماحد زيكم مشفو...      1\n",
+              "2                  مش باين حاجه خالص 😣 مش عارف بقى 😔      0\n",
+              "3  اليوم الاثنين 👏 يقولك من المسلمين عندهم خاله ا...      0\n",
+              "4                      حمدلله ماحطها في فمي اساسا 😷🤢      0"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-b11c946f-8374-47aa-beb7-723c94a190db\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>وصارت فطاير البقالات غذاء صحي 👎🏻</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>روحي لبريده تلقين اشباه كثير بس ماحد زيكم مشفو...</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>مش باين حاجه خالص 😣 مش عارف بقى 😔</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>اليوم الاثنين 👏 يقولك من المسلمين عندهم خاله ا...</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>حمدلله ماحطها في فمي اساسا 😷🤢</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b11c946f-8374-47aa-beb7-723c94a190db')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-b11c946f-8374-47aa-beb7-723c94a190db button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-b11c946f-8374-47aa-beb7-723c94a190db');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-f63f0e08-4e89-411f-9c27-82b66242c046\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-f63f0e08-4e89-411f-9c27-82b66242c046')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-f63f0e08-4e89-411f-9c27-82b66242c046 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "train_data",
+              "summary": "{\n  \"name\": \"train_data\",\n  \"rows\": 8886,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8870,\n        \"samples\": [\n          \"\\u0647\\u0647 \\u0645\\u0648 \\u0628\\u0646\\u062a\\u064a \\ud83d\\udc4a\\ud83d\\udc4a\\ud83d\\ude02 \\u0645\\u0627\\u0632\\u0644\\u062a \\u0639\\u0630\\u0628\\u0627\\u0621 \\u0648\\u0627\\u0641\\u062a\\u062e\\u0631 \\ud83d\\ude0c\\ud83d\\ude02\",\n          \"\\u064a\\u0627\\u0631\\u0628 \\u0635\\u0628\\u0631\\u0646\\u064a \\u0639\\u0644\\u0649 \\u0644\\u0648\\u0645\\u064a \\ud83d\\udc94\\ud83d\\udc94 \\u064a\\u0635\\u062d\\u064a\\u0646\\u064a \\u0645\\u0646 \\u0639\\u0632 \\u0646\\u0648\\u0645\\u064a \\u0628\\u0627\\u0644\\u0636\\u0631\\u0628 \\u0639\\u0644\\u0649 \\u0631\\u0627\\u0633\\u064a \\ud83d\\ude21\\ud83d\\udc4a\",\n          \"\\u0648\\u0627\\u0644\\u0644\\u0647 \\u0627\\u0644\\u0646\\u0627\\u0642\\u0635\\u0647 \\u0627\\u0645\\u0643 \\u0644\\u0644\\u0627\\u0633\\u0641 \\u062a\\u062d\\u062a\\u0627\\u062c \\u0645\\u0646\\u0643\\u0645 \\u0627\\u0646 \\u062a\\u0643\\u0645\\u0644\\u0648\\u0647\\u0627 \\u0627\\u0645\\u0627 \\u0646\\u062d\\u0646 \\u0648\\u0627\\u0644\\u0644\\u0647 \\u0627\\u0644\\u062d\\u0645\\u062f \\u0643\\u0627\\u0645\\u0644\\u064a\\u0646 \\u0639\\u0642\\u0644 \\u064a\\u0627\\u0646\\u0627\\u0642\\u0635 \\u0627\\u0644\\u0639\\u0642\\u0644 \\u0644\\u0627 \\u0648\\u0627\\u0639\\u0644\\u0627\\u0645\\u064a \\ud83e\\udd22\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 19
+        }
+      ],
+      "source": [
+        "train_data['label'] = train_data['label'].apply(lambda x: label2id[x])\n",
+        "train_data=train_data[[\"Text\", \"label\"]]\n",
+        "train_data.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 20,
+      "metadata": {
+        "id": "QPAuj5Pz0c--",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "outputId": "4ae3a361-bff6-48f4-d59f-50ad0e9d5225"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                Text  label\n",
+              "0                      داليا مبارك مادري ليش تقرفت 😷      0\n",
+              "1  ابديت السناب الجديد ❌ حاس الناس حوس أشوف مشاهي...      0\n",
+              "2                             هييه والله واايدد 🔪🔪💔💔      0\n",
+              "3                                      اكيد اخس شي 😤      0\n",
+              "4                          مابي شي الحين غير فراشي 😣      0"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-a2955744-e835-417e-9183-4858d5eb314a\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>داليا مبارك مادري ليش تقرفت 😷</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>ابديت السناب الجديد ❌ حاس الناس حوس أشوف مشاهي...</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>هييه والله واايدد 🔪🔪💔💔</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>اكيد اخس شي 😤</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>مابي شي الحين غير فراشي 😣</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a2955744-e835-417e-9183-4858d5eb314a')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-a2955744-e835-417e-9183-4858d5eb314a button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-a2955744-e835-417e-9183-4858d5eb314a');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-d71d199f-34be-4281-9460-9b2bdfa86b46\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d71d199f-34be-4281-9460-9b2bdfa86b46')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-d71d199f-34be-4281-9460-9b2bdfa86b46 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "dev_data",
+              "summary": "{\n  \"name\": \"dev_data\",\n  \"rows\": 1269,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 1268,\n        \"samples\": [\n          \"\\u0644\\u0648 \\u0643\\u0627\\u0646 \\u0627\\u0644\\u0627\\u0646\\u062a\\u0638\\u0627\\u0631 \\u0631\\u062c\\u0644\\u0627 \\u0644\\u0642\\u062a\\u0644\\u062a\\u0647 \\ud83d\\ude21\",\n          \"\\u0639\\u0646\\u062f\\u064a \\u0637\\u0641\\u0644\\u0647 \\u0635\\u063a\\u064a\\u0631\\u0629 \\u0643\\u0644 \\u0645\\u0627 \\u0633\\u0648\\u062a \\u0645\\u0635\\u064a\\u0628\\u0647 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d \\u0631\\u0645\\u062a \\u0627\\u0644\\u0645\\u0641\\u062a\\u0627\\u062d \\u0645\\u0639 \\u0627\\u0644\\u0634\\u0628\\u0627\\u0643 \\u0644\\u064a\\u0634 \\u064a\\u0627\\u0628\\u0627\\u0628\\u0627 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d \\u0631\\u0645\\u062a \\u0645\\u0639\\u062c\\u0648\\u0646 \\u0627\\u0644\\u0627\\u0633\\u0646\\u0627\\u0646 \\u0644\\u064a\\u0634 \\u064a\\u0627 \\u0628\\u0627\\u0628\\u0627 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d \\u0632\\u0639\\u0644\\u062a \\u0639\\u0644\\u064a \\u0642\\u0627\\u0644\\u062a \\u0644\\u064a \\u0643\\u0644 \\u062a\\u0631\\u0627\\u0628 \\u0627\\u0646\\u0635\\u062f\\u0645\\u062a \\u0645\\u062f\\u0631\\u064a \\u0645\\u0646 \\u0641\\u064a\\u0646 \\u062c\\u0627\\u064a\\u0628\\u0629 \\u0647\\u0627\\u0644\\u0643\\u0644\\u0645\\u0629 \\u0642\\u0644\\u062a \\u0627\\u064a\\u0634 \\u0627\\u064a\\u0634 \\ud83d\\ude20 \\u0642\\u0627\\u0644\\u062a \\u0627\\u0645\\u0632\\u062d \\u0647\\u0647\",\n          \"\\u0648\\u0639\\u0642\\u0644\\u0643 \\u0643\\u0627\\u0646 \\u0641\\u064a\\u0646 \\u0645\\u0646 \\u0627\\u0644\\u0623\\u0648\\u0644 \\u064a\\u0627 \\u0633\\u0643\\u0631 \\ud83d\\ude02\\ud83d\\ude02 \\u0648\\u0627\\u0646\\u062a \\u062a\\u0642\\u062f\\u0631 \\u062a\\u0633\\u0639\\u062f\\u0647\\u0627 \\u064a\\u0627\\u0641\\u0627\\u0644\\u062d \\u0628\\u0633 \\u0627\\u0644\\u0628\\u062f\\u0627\\u064a\\u0629 \\u063a\\u0644\\u0637 \\ud83d\\udc4a\\ud83d\\udc4a\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 20
+        }
+      ],
+      "source": [
+        "dev_data['label'] = dev_data['label'].apply(lambda x: label2id[x])\n",
+        "dev_data=dev_data[[\"Text\", \"label\"]]\n",
+        "dev_data.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "test_data['label'] = test_data['label'].apply(lambda x: label2id[x])\n",
+        "test_data=test_data[[\"Text\", \"label\"]]\n",
+        "test_data"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 424
+        },
+        "id": "9Dkns-XMjuvH",
+        "outputId": "81b93b0a-989c-43c6-b56a-a5192657c375"
+      },
+      "execution_count": 21,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                   Text  label\n",
+              "0                 مشفتش العرض بتاعهم لا مش مهتمة لا 😩🐸😂      0\n",
+              "1     عندما تكون لوحدك تحس انك لحالك صح 😊 حتى انا مث...      0\n",
+              "2     ماشاء الله الرجال باين عليه محترم البنات متى ت...      0\n",
+              "3                                   شسالفة احد يفهمني 😤      0\n",
+              "4            اقوول استريح عاد احتفالاتنا تحط اغانينا 😡😡      0\n",
+              "...                                                 ...    ...\n",
+              "2535  قله حيا وين اهلهم ذولي الله لايبلانا لهالدرجه ...      1\n",
+              "2536                                      ثم الطحلبه 🐸🐸      1\n",
+              "2537         يا وجه الله 😷 من اليوم ورايح شاورما انسى 🔪      0\n",
+              "2538                          متخلف حتى الحلال حرمتوه 😷      1\n",
+              "2539       حنا خقينا على بنت رئيس مو على بياع كليجا 😝👊🏻      0\n",
+              "\n",
+              "[2540 rows x 2 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-39f4e2c1-a935-4cef-8662-3a73e43a03e0\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>مشفتش العرض بتاعهم لا مش مهتمة لا 😩🐸😂</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>عندما تكون لوحدك تحس انك لحالك صح 😊 حتى انا مث...</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>ماشاء الله الرجال باين عليه محترم البنات متى ت...</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>شسالفة احد يفهمني 😤</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>اقوول استريح عاد احتفالاتنا تحط اغانينا 😡😡</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2535</th>\n",
+              "      <td>قله حيا وين اهلهم ذولي الله لايبلانا لهالدرجه ...</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2536</th>\n",
+              "      <td>ثم الطحلبه 🐸🐸</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2537</th>\n",
+              "      <td>يا وجه الله 😷 من اليوم ورايح شاورما انسى 🔪</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2538</th>\n",
+              "      <td>متخلف حتى الحلال حرمتوه 😷</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2539</th>\n",
+              "      <td>حنا خقينا على بنت رئيس مو على بياع كليجا 😝👊🏻</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>2540 rows × 2 columns</p>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-39f4e2c1-a935-4cef-8662-3a73e43a03e0')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-39f4e2c1-a935-4cef-8662-3a73e43a03e0 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-39f4e2c1-a935-4cef-8662-3a73e43a03e0');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-b6189619-740a-46c8-b6aa-7df2fed58c42\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-b6189619-740a-46c8-b6aa-7df2fed58c42')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-b6189619-740a-46c8-b6aa-7df2fed58c42 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "  <div id=\"id_e526d066-ed73-43fd-9a87-d7141f6a1444\">\n",
+              "    <style>\n",
+              "      .colab-df-generate {\n",
+              "        background-color: #E8F0FE;\n",
+              "        border: none;\n",
+              "        border-radius: 50%;\n",
+              "        cursor: pointer;\n",
+              "        display: none;\n",
+              "        fill: #1967D2;\n",
+              "        height: 32px;\n",
+              "        padding: 0 0 0 0;\n",
+              "        width: 32px;\n",
+              "      }\n",
+              "\n",
+              "      .colab-df-generate:hover {\n",
+              "        background-color: #E2EBFA;\n",
+              "        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "        fill: #174EA6;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate {\n",
+              "        background-color: #3B4455;\n",
+              "        fill: #D2E3FC;\n",
+              "      }\n",
+              "\n",
+              "      [theme=dark] .colab-df-generate:hover {\n",
+              "        background-color: #434B5C;\n",
+              "        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "        fill: #FFFFFF;\n",
+              "      }\n",
+              "    </style>\n",
+              "    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('test_data')\"\n",
+              "            title=\"Generate code using this dataframe.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "    <script>\n",
+              "      (() => {\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#id_e526d066-ed73-43fd-9a87-d7141f6a1444 button.colab-df-generate');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      buttonEl.onclick = () => {\n",
+              "        google.colab.notebook.generateWithVariable('test_data');\n",
+              "      }\n",
+              "      })();\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "test_data",
+              "summary": "{\n  \"name\": \"test_data\",\n  \"rows\": 2540,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 2539,\n        \"samples\": [\n          \"\\u0647\\u0647 \\ud83d\\ude02\\ud83d\\ude02\\ud83d\\udc94\\ud83d\\udd2a\",\n          \"\\u0627\\u0648\\u0648\\u0641 \\ud83d\\ude21\",\n          \"\\u0628\\u0627\\u064a\\u0639 \\u0627\\u0644\\u0643\\u0644\\u064a\\u062c\\u0627\\u0627\\u0644\\u0644\\u0647 \\u064a\\u0633\\u062a\\u0631 \\u0639\\u0644\\u064a\\u0647 \\u0645\\u0646 \\u0627\\u062e\\u0648\\u0627\\u062a \\u0632\\u0644\\u064a\\u062e\\u0647 \\u062c\\u0645\\u0627\\u0644\\u0647 \\u0639\\u0627\\u064a\\u062f\\u064a \\u0637\\u064a\\u0628 \\u0644\\u0648\\u0634\\u0627\\u0641\\u0646 \\u064a\\u0648\\u0633\\u0641 \\u0639\\u0644\\u064a\\u0647 \\u0627\\u0644\\u0633\\u0644\\u0627\\u0645 \\u0648\\u0634 \\u0628\\u064a\\u0642\\u0637\\u0639\\u0646 \\ud83d\\udd2a\\ud83d\\udd2a\\ud83d\\udc94\\ud83d\\ude02\\ud83d\\ude02\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 21
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 22,
+      "metadata": {
+        "id": "kB57ziQ83vP3",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "5e827c2e-abb8-4857-cdff-7d9b9c3cf558"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Original class distribution: Counter({0: 5715, 1: 3171})\n",
+            "Resampled class distribution: Counter({0: 5715, 1: 5715})\n"
+          ]
+        }
+      ],
+      "source": [
+        "import pandas as pd\n",
+        "from imblearn.over_sampling import RandomOverSampler\n",
+        "from collections import Counter\n",
+        "\n",
+        "X = train_data[['Text']]\n",
+        "y = train_data['label']\n",
+        "\n",
+        "print('Original class distribution:', Counter(y))\n",
+        "\n",
+        "ros = RandomOverSampler(random_state=42)\n",
+        "\n",
+        "X_resampled, y_resampled = ros.fit_resample(X, y)\n",
+        "\n",
+        "train_data_resampled = pd.DataFrame(X_resampled, columns=['Text'])\n",
+        "train_data_resampled['label'] = y_resampled\n",
+        "\n",
+        "print('Resampled class distribution:', Counter(y_resampled))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "y_resampled.value_counts()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 178
+        },
+        "id": "yN8_AEhEj5IY",
+        "outputId": "70aad1a5-ceee-4484-edb9-36a9150dc4be"
+      },
+      "execution_count": 23,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "label\n",
+              "0    5715\n",
+              "1    5715\n",
+              "Name: count, dtype: int64"
+            ],
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>count</th>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>label</th>\n",
+              "      <th></th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>5715</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>5715</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div><br><label><b>dtype:</b> int64</label>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 23
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 24,
+      "metadata": {
+        "id": "lsoxSLHk7XsK",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "outputId": "5b6b3acf-95b4-49ce-ee3d-a99ec3078071"
+      },
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                                Text  label\n",
+              "0                   وصارت فطاير البقالات غذاء صحي 👎🏻      0\n",
+              "1  روحي لبريده تلقين اشباه كثير بس ماحد زيكم مشفو...      1\n",
+              "2                  مش باين حاجه خالص 😣 مش عارف بقى 😔      0\n",
+              "3  اليوم الاثنين 👏 يقولك من المسلمين عندهم خاله ا...      0\n",
+              "4                      حمدلله ماحطها في فمي اساسا 😷🤢      0"
+            ],
+            "text/html": [
+              "\n",
+              "  <div id=\"df-53dfce8f-80c8-4867-84ef-a7c135fbf8b2\" class=\"colab-df-container\">\n",
+              "    <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Text</th>\n",
+              "      <th>label</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>وصارت فطاير البقالات غذاء صحي 👎🏻</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>روحي لبريده تلقين اشباه كثير بس ماحد زيكم مشفو...</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>مش باين حاجه خالص 😣 مش عارف بقى 😔</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>اليوم الاثنين 👏 يقولك من المسلمين عندهم خاله ا...</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>حمدلله ماحطها في فمي اساسا 😷🤢</td>\n",
+              "      <td>0</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>\n",
+              "    <div class=\"colab-df-buttons\">\n",
+              "\n",
+              "  <div class=\"colab-df-container\">\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-53dfce8f-80c8-4867-84ef-a7c135fbf8b2')\"\n",
+              "            title=\"Convert this dataframe to an interactive table.\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
+              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
+              "  </svg>\n",
+              "    </button>\n",
+              "\n",
+              "  <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-buttons div {\n",
+              "      margin-bottom: 4px;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "    <script>\n",
+              "      const buttonEl =\n",
+              "        document.querySelector('#df-53dfce8f-80c8-4867-84ef-a7c135fbf8b2 button.colab-df-convert');\n",
+              "      buttonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "      async function convertToInteractive(key) {\n",
+              "        const element = document.querySelector('#df-53dfce8f-80c8-4867-84ef-a7c135fbf8b2');\n",
+              "        const dataTable =\n",
+              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                    [key], {});\n",
+              "        if (!dataTable) return;\n",
+              "\n",
+              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "          + ' to learn more about interactive tables.';\n",
+              "        element.innerHTML = '';\n",
+              "        dataTable['output_type'] = 'display_data';\n",
+              "        await google.colab.output.renderOutput(dataTable, element);\n",
+              "        const docLink = document.createElement('div');\n",
+              "        docLink.innerHTML = docLinkHtml;\n",
+              "        element.appendChild(docLink);\n",
+              "      }\n",
+              "    </script>\n",
+              "  </div>\n",
+              "\n",
+              "\n",
+              "<div id=\"df-25fd1492-1f0f-4e52-8621-a2a911609a33\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-25fd1492-1f0f-4e52-8621-a2a911609a33')\"\n",
+              "            title=\"Suggest charts\"\n",
+              "            style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "  </button>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "      --bg-color: #E8F0FE;\n",
+              "      --fill-color: #1967D2;\n",
+              "      --hover-bg-color: #E2EBFA;\n",
+              "      --hover-fill-color: #174EA6;\n",
+              "      --disabled-fill-color: #AAA;\n",
+              "      --disabled-bg-color: #DDD;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "      --bg-color: #3B4455;\n",
+              "      --fill-color: #D2E3FC;\n",
+              "      --hover-bg-color: #434B5C;\n",
+              "      --hover-fill-color: #FFFFFF;\n",
+              "      --disabled-bg-color: #3B4455;\n",
+              "      --disabled-fill-color: #666;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: var(--bg-color);\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: var(--fill-color);\n",
+              "    height: 32px;\n",
+              "    padding: 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: var(--hover-bg-color);\n",
+              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: var(--button-hover-fill-color);\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart-complete:disabled,\n",
+              "  .colab-df-quickchart-complete:disabled:hover {\n",
+              "    background-color: var(--disabled-bg-color);\n",
+              "    fill: var(--disabled-fill-color);\n",
+              "    box-shadow: none;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-spinner {\n",
+              "    border: 2px solid var(--fill-color);\n",
+              "    border-color: transparent;\n",
+              "    border-bottom-color: var(--fill-color);\n",
+              "    animation:\n",
+              "      spin 1s steps(1) infinite;\n",
+              "  }\n",
+              "\n",
+              "  @keyframes spin {\n",
+              "    0% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "      border-left-color: var(--fill-color);\n",
+              "    }\n",
+              "    20% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    30% {\n",
+              "      border-color: transparent;\n",
+              "      border-left-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    40% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-top-color: var(--fill-color);\n",
+              "    }\n",
+              "    60% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "    }\n",
+              "    80% {\n",
+              "      border-color: transparent;\n",
+              "      border-right-color: var(--fill-color);\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "    90% {\n",
+              "      border-color: transparent;\n",
+              "      border-bottom-color: var(--fill-color);\n",
+              "    }\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "  <script>\n",
+              "    async function quickchart(key) {\n",
+              "      const quickchartButtonEl =\n",
+              "        document.querySelector('#' + key + ' button');\n",
+              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
+              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
+              "      try {\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      } catch (error) {\n",
+              "        console.error('Error during call to suggestCharts:', error);\n",
+              "      }\n",
+              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
+              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
+              "    }\n",
+              "    (() => {\n",
+              "      let quickchartButtonEl =\n",
+              "        document.querySelector('#df-25fd1492-1f0f-4e52-8621-a2a911609a33 button');\n",
+              "      quickchartButtonEl.style.display =\n",
+              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "    })();\n",
+              "  </script>\n",
+              "</div>\n",
+              "\n",
+              "    </div>\n",
+              "  </div>\n"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "train_data_resampled",
+              "summary": "{\n  \"name\": \"train_data_resampled\",\n  \"rows\": 11430,\n  \"fields\": [\n    {\n      \"column\": \"Text\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8870,\n        \"samples\": [\n          \"\\u0647\\u0647 \\u0645\\u0648 \\u0628\\u0646\\u062a\\u064a \\ud83d\\udc4a\\ud83d\\udc4a\\ud83d\\ude02 \\u0645\\u0627\\u0632\\u0644\\u062a \\u0639\\u0630\\u0628\\u0627\\u0621 \\u0648\\u0627\\u0641\\u062a\\u062e\\u0631 \\ud83d\\ude0c\\ud83d\\ude02\",\n          \"\\u064a\\u0627\\u0631\\u0628 \\u0635\\u0628\\u0631\\u0646\\u064a \\u0639\\u0644\\u0649 \\u0644\\u0648\\u0645\\u064a \\ud83d\\udc94\\ud83d\\udc94 \\u064a\\u0635\\u062d\\u064a\\u0646\\u064a \\u0645\\u0646 \\u0639\\u0632 \\u0646\\u0648\\u0645\\u064a \\u0628\\u0627\\u0644\\u0636\\u0631\\u0628 \\u0639\\u0644\\u0649 \\u0631\\u0627\\u0633\\u064a \\ud83d\\ude21\\ud83d\\udc4a\",\n          \"\\u0648\\u0627\\u0644\\u0644\\u0647 \\u0627\\u0644\\u0646\\u0627\\u0642\\u0635\\u0647 \\u0627\\u0645\\u0643 \\u0644\\u0644\\u0627\\u0633\\u0641 \\u062a\\u062d\\u062a\\u0627\\u062c \\u0645\\u0646\\u0643\\u0645 \\u0627\\u0646 \\u062a\\u0643\\u0645\\u0644\\u0648\\u0647\\u0627 \\u0627\\u0645\\u0627 \\u0646\\u062d\\u0646 \\u0648\\u0627\\u0644\\u0644\\u0647 \\u0627\\u0644\\u062d\\u0645\\u062f \\u0643\\u0627\\u0645\\u0644\\u064a\\u0646 \\u0639\\u0642\\u0644 \\u064a\\u0627\\u0646\\u0627\\u0642\\u0635 \\u0627\\u0644\\u0639\\u0642\\u0644 \\u0644\\u0627 \\u0648\\u0627\\u0639\\u0644\\u0627\\u0645\\u064a \\ud83e\\udd22\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"label\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
+          },
+          "metadata": {},
+          "execution_count": 24
+        }
+      ],
+      "source": [
+        "train_data_resampled.head()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 25,
+      "metadata": {
+        "id": "9UYWjyD_7Xzs",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "947db852-87e5-4e3c-e79c-afeaa7d9f96a"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Training data shape: (11430,) (11430,)\n",
+            "Validation data shape: (1269,) (1269,)\n"
+          ]
+        }
+      ],
+      "source": [
+        "from sklearn.model_selection import train_test_split\n",
+        "\n",
+        "X_train = train_data_resampled['Text'].values\n",
+        "y_train = train_data_resampled['label'].values\n",
+        "\n",
+        "X_val = dev_data['Text'].values\n",
+        "y_val = dev_data['label'].values\n",
+        "\n",
+        "\n",
+        "\n",
+        "print(\"Training data shape:\", X_train.shape, y_train.shape)\n",
+        "print(\"Validation data shape:\", X_val.shape, y_val.shape)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 26,
+      "metadata": {
+        "id": "C4lzRYvx7X6U",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "895b827b-d407-4c37-f630-5ce6f486025a"
+      },
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Maximum length of text: 59\n"
+          ]
+        }
+      ],
+      "source": [
+        "train_text_lengths = [len(text.split()) for text in X_train]\n",
+        "max_length = max(train_text_lengths)\n",
+        "\n",
+        "print(\"Maximum length of text:\", max_length)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### APPLYING QARIB MODEL"
+      ],
+      "metadata": {
+        "id": "tEoMuIeVkKYc"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "! pip install transformers[torch]"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "7ivxSOxUkIzt",
+        "outputId": "c237ed8e-5972-4612-dfd6-69e692d579ab"
+      },
+      "execution_count": 27,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: transformers[torch] in /usr/local/lib/python3.10/dist-packages (4.42.4)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (3.15.4)\n",
+            "Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.23.5)\n",
+            "Requirement already satisfied: numpy<2.0,>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (1.26.4)\n",
+            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (24.1)\n",
+            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (6.0.2)\n",
+            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2024.5.15)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.32.3)\n",
+            "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.4.4)\n",
+            "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.19.1)\n",
+            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (4.66.5)\n",
+            "Requirement already satisfied: accelerate>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.32.1)\n",
+            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.3.1+cu121)\n",
+            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->transformers[torch]) (5.9.5)\n",
+            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers[torch]) (2024.6.1)\n",
+            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers[torch]) (4.12.2)\n",
+            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (1.13.1)\n",
+            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (3.3)\n",
+            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (3.1.4)\n",
+            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (8.9.2.26)\n",
+            "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.3.1)\n",
+            "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (11.0.2.54)\n",
+            "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (10.3.2.106)\n",
+            "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (11.4.5.107)\n",
+            "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.0.106)\n",
+            "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (2.20.5)\n",
+            "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n",
+            "Requirement already satisfied: triton==2.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (2.3.1)\n",
+            "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->transformers[torch]) (12.6.20)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.3.2)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.7)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2.0.7)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2024.7.4)\n",
+            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->transformers[torch]) (2.1.5)\n",
+            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->transformers[torch]) (1.3.0)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import numpy as np\n",
+        "\n",
+        "# to prepare dataset and calculate metrics\n",
+        "from sklearn.metrics import classification_report, accuracy_score, f1_score, confusion_matrix, precision_score , recall_score\n",
+        "\n",
+        "from transformers import AutoConfig, BertForSequenceClassification, AutoTokenizer\n",
+        "from transformers.data.processors import SingleSentenceClassificationProcessor, InputFeatures\n",
+        "from transformers import Trainer , TrainingArguments"
+      ],
+      "metadata": {
+        "id": "cEAZniEwkI25"
+      },
+      "execution_count": 28,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "train_df = pd.DataFrame({\n",
+        "    'label':y_train,\n",
+        "    'text': X_train\n",
+        "    })\n",
+        "\n",
+        "dev_df = pd.DataFrame({\n",
+        "    'label':y_val,\n",
+        "    'text': X_val\n",
+        "    })\n",
+        "\n",
+        "test_df = pd.DataFrame({\n",
+        "    'label':test_data['label'],\n",
+        "    'text': test_data['Text']\n",
+        "    })"
+      ],
+      "metadata": {
+        "id": "QtGA8ndrkI5f"
+      },
+      "execution_count": 29,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "PREFIX_LIST = [\n",
+        "    \"ال\",\n",
+        "    \"و\",\n",
+        "    \"ف\",\n",
+        "    \"ب\",\n",
+        "    \"ك\",\n",
+        "    \"ل\",\n",
+        "    \"لل\",\n",
+        "    \"\\u0627\\u0644\",\n",
+        "    \"\\u0648\",\n",
+        "    \"\\u0641\",\n",
+        "    \"\\u0628\",\n",
+        "    \"\\u0643\",\n",
+        "    \"\\u0644\",\n",
+        "    \"\\u0644\\u0644\",\n",
+        "    \"س\",\n",
+        "]\n",
+        "SUFFIX_LIST = [\n",
+        "    \"ه\",\n",
+        "    \"ها\",\n",
+        "    \"ك\",\n",
+        "    \"ي\",\n",
+        "    \"هما\",\n",
+        "    \"كما\",\n",
+        "    \"نا\",\n",
+        "    \"كم\",\n",
+        "    \"هم\",\n",
+        "    \"هن\",\n",
+        "    \"كن\",\n",
+        "    \"ا\",\n",
+        "    \"ان\",\n",
+        "    \"ين\",\n",
+        "    \"ون\",\n",
+        "    \"وا\",\n",
+        "    \"ات\",\n",
+        "    \"ت\",\n",
+        "    \"ن\",\n",
+        "    \"ة\",\n",
+        "    \"\\u0647\",\n",
+        "    \"\\u0647\\u0627\",\n",
+        "    \"\\u0643\",\n",
+        "    \"\\u064a\",\n",
+        "    \"\\u0647\\u0645\\u0627\",\n",
+        "    \"\\u0643\\u0645\\u0627\",\n",
+        "    \"\\u0646\\u0627\",\n",
+        "    \"\\u0643\\u0645\",\n",
+        "    \"\\u0647\\u0645\",\n",
+        "    \"\\u0647\\u0646\",\n",
+        "    \"\\u0643\\u0646\",\n",
+        "    \"\\u0627\",\n",
+        "    \"\\u0627\\u0646\",\n",
+        "    \"\\u064a\\u0646\",\n",
+        "    \"\\u0648\\u0646\",\n",
+        "    \"\\u0648\\u0627\",\n",
+        "    \"\\u0627\\u062a\",\n",
+        "    \"\\u062a\",\n",
+        "    \"\\u0646\",\n",
+        "    \"\\u0629\",\n",
+        "]\n",
+        "\n",
+        "\n",
+        "# the never_split list is used with the transformers library\n",
+        "_PREFIX_SYMBOLS = [x + \"+\" for x in PREFIX_LIST]\n",
+        "_SUFFIX_SYMBOLS = [\"+\" + x for x in SUFFIX_LIST]\n",
+        "NEVER_SPLIT_TOKENS = list(set(_PREFIX_SYMBOLS + _SUFFIX_SYMBOLS))"
+      ],
+      "metadata": {
+        "id": "poPrBhk3kI8W"
+      },
+      "execution_count": 30,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model_name = \"qarib/bert-base-qarib\"\n",
+        "num_labels = 2\n",
+        "config = AutoConfig.from_pretrained(model_name,num_labels=num_labels, output_attentions=True)\n",
+        "tokenizer = AutoTokenizer.from_pretrained(model_name,\n",
+        "                                          do_lower_case=False,\n",
+        "                                          do_basic_tokenize=True,\n",
+        "                                          never_split=NEVER_SPLIT_TOKENS)\n",
+        "tokenizer.max_len = 64\n",
+        "model = BertForSequenceClassification.from_pretrained(model_name, config=config)\n",
+        "\n",
+        "train_dataset = SingleSentenceClassificationProcessor(mode='classification')\n",
+        "dev_dataset = SingleSentenceClassificationProcessor(mode='classification')\n",
+        "\n",
+        "train_dataset.add_examples(texts_or_text_and_labels=train_df['text'],labels=train_df['label'],overwrite_examples = True)\n",
+        "dev_dataset.add_examples(texts_or_text_and_labels=dev_df['text'],labels=dev_df['label'],overwrite_examples = True)\n",
+        "print(train_dataset.examples[0])\n",
+        "\n",
+        "train_features = train_dataset.get_features(tokenizer = tokenizer, max_length =64)\n",
+        "dev_features = dev_dataset.get_features(tokenizer = tokenizer, max_length =64)\n",
+        "# print(config)\n",
+        "\n",
+        "print(len(train_features))\n",
+        "print(len(dev_features))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "mPueTThMkI-9",
+        "outputId": "2064aba1-e125-4d87-b502-b7ac54dc7b94"
+      },
+      "execution_count": 31,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
+            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+            "You will be able to reuse this secret in all of your notebooks.\n",
+            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+            "  warnings.warn(\n",
+            "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at qarib/bert-base-qarib and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+            "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "InputExample(guid=None, text_a='وصارت فطاير البقالات غذاء صحي 👎🏻', text_b=None, label=0)\n",
+            "11430\n",
+            "1269\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "\n",
+        "def compute_metrics(p): #p should be of type EvalPrediction\n",
+        "    print(np.shape(p.predictions[0]))\n",
+        "    print(np.shape(p.predictions[1]))\n",
+        "    print(len(p.label_ids))\n",
+        "    preds = np.argmax(p.predictions[0], axis=1)\n",
+        "    assert len(preds) == len(p.label_ids)\n",
+        "    print(classification_report(p.label_ids,preds))\n",
+        "    print(confusion_matrix(p.label_ids,preds))\n",
+        "\n",
+        "    macro_f1 = f1_score(p.label_ids,preds,average='macro')\n",
+        "    macro_precision = precision_score(p.label_ids,preds,average='macro')\n",
+        "    macro_recall = recall_score(p.label_ids,preds,average='macro')\n",
+        "    acc = accuracy_score(p.label_ids,preds)\n",
+        "    return {\n",
+        "      'macro_f1' : macro_f1,\n",
+        "      'macro_precision': macro_precision,\n",
+        "      'macro_recall': macro_recall,\n",
+        "      'accuracy': acc\n",
+        "    }"
+      ],
+      "metadata": {
+        "id": "bw33WDOvkJBQ"
+      },
+      "execution_count": 32,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "! mkdir train\n",
+        "training_args = TrainingArguments(\"./train\")\n",
+        "training_args.do_train = True\n",
+        "training_args.evaluate_during_training = True\n",
+        "training_args.adam_epsilon = 1e-8\n",
+        "training_args.learning_rate = 2e-5\n",
+        "training_args.warmup_steps = 0\n",
+        "training_args.per_device_train_batch_size = 32  #Increase batch size\n",
+        "training_args.per_device_eval_batch_size = 32   #Increase batch size\n",
+        "training_args.num_train_epochs = 2              #reduce number of epoch\n",
+        "training_args.logging_steps = 200               #Increase logging steps\n",
+        "training_args.save_steps = 1000                 #Increase save steps\n",
+        "training_args.seed = 42\n",
+        "print(training_args.logging_steps)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cv8L1xaEkJEA",
+        "outputId": "cb4048b3-c6d5-41b1-f419-3448cd153c4f"
+      },
+      "execution_count": 33,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "mkdir: cannot create directory ‘train’: File exists\n",
+            "200\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# instantiate trainer\n",
+        "trainer = Trainer(model=model,\n",
+        "                  args = training_args,\n",
+        "                  train_dataset = train_features,\n",
+        "                  eval_dataset = dev_features,\n",
+        "                  compute_metrics = compute_metrics)\n",
+        "# start training\n",
+        "trainer.train()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 224
+        },
+        "id": "Z5rCtBVskJGY",
+        "outputId": "1e2bfab6-7e6b-4609-f330-309bca070d00"
+      },
+      "execution_count": 34,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "BertSdpaSelfAttention is used but `torch.nn.functional.scaled_dot_product_attention` does not support non-absolute `position_embedding_type` or `output_attentions=True` or `head_mask`. Falling back to the manual attention implementation, but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation=\"eager\"` when loading the model.\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "\n",
+              "    <div>\n",
+              "      \n",
+              "      <progress value='716' max='716' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+              "      [716/716 04:29, Epoch 2/2]\n",
+              "    </div>\n",
+              "    <table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              " <tr style=\"text-align: left;\">\n",
+              "      <th>Step</th>\n",
+              "      <th>Training Loss</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <td>200</td>\n",
+              "      <td>0.409100</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>400</td>\n",
+              "      <td>0.302700</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <td>600</td>\n",
+              "      <td>0.173300</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table><p>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "TrainOutput(global_step=716, training_loss=0.2719311820728153, metrics={'train_runtime': 273.0161, 'train_samples_per_second': 83.731, 'train_steps_per_second': 2.623, 'total_flos': 751839840691200.0, 'train_loss': 0.2719311820728153, 'epoch': 2.0})"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 34
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "trainer.evaluate()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 436
+        },
+        "id": "7rs4VqEmkJJP",
+        "outputId": "886bc7fc-4f36-4108-8ae3-176d195b250b"
+      },
+      "execution_count": 35,
+      "outputs": [
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "\n",
+              "    <div>\n",
+              "      \n",
+              "      <progress value='40' max='40' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+              "      [40/40 00:05]\n",
+              "    </div>\n",
+              "    "
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "(1269, 2)\n",
+            "(12, 1269, 12, 64, 64)\n",
+            "1269\n",
+            "              precision    recall  f1-score   support\n",
+            "\n",
+            "           0       0.92      0.89      0.90       865\n",
+            "           1       0.78      0.83      0.80       404\n",
+            "\n",
+            "    accuracy                           0.87      1269\n",
+            "   macro avg       0.85      0.86      0.85      1269\n",
+            "weighted avg       0.87      0.87      0.87      1269\n",
+            "\n",
+            "[[768  97]\n",
+            " [ 69 335]]\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{'eval_loss': 0.44044920802116394,\n",
+              " 'eval_macro_f1': 0.8519515458874727,\n",
+              " 'eval_macro_precision': 0.84651284348865,\n",
+              " 'eval_macro_recall': 0.8585345962341899,\n",
+              " 'eval_accuracy': 0.8691883372734437,\n",
+              " 'eval_runtime': 10.8551,\n",
+              " 'eval_samples_per_second': 116.904,\n",
+              " 'eval_steps_per_second': 3.685,\n",
+              " 'epoch': 2.0}"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 35
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install fasttext\n",
+        "import fasttext\n",
+        "import fasttext.util\n",
+        "from huggingface_hub import hf_hub_download\n",
+        "\n",
+        "model_path = hf_hub_download(repo_id=\"facebook/fasttext-ar-vectors\", filename=\"model.bin\")\n",
+        "# model_path = \"./fasttext-ar-vectors-150.bin\"\n",
+        "model_fasttext = fasttext.load_model(model_path)\n",
+        "# model_fasttext = fasttext.util.reduce_model(model_fasttext, 150) # reduce embeddings dimension to 150 from 300; requires a huge memory notebook\n",
+        "# model_fasttext.save_model(\"/content/drive/MyDrive/Colab Notebooks/text-aml/hate-speech-ds/fasttext-ar-vectors-150.bin\")\n",
+        "print(len(model_fasttext.words))\n",
+        "model_fasttext['bread'].shape"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "nRVxt4kUsIrl",
+        "outputId": "390379f6-2a02-40fe-da8f-df00a6869d57"
+      },
+      "execution_count": 36,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: fasttext in /usr/local/lib/python3.10/dist-packages (0.9.2)\n",
+            "Requirement already satisfied: pybind11>=2.2 in /usr/local/lib/python3.10/dist-packages (from fasttext) (2.13.4)\n",
+            "Requirement already satisfied: setuptools>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from fasttext) (71.0.4)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from fasttext) (1.26.4)\n",
+            "2000000\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(300,)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 36
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import nltk\n",
+        "from nltk.corpus import stopwords\n",
+        "from nltk.tokenize import WordPunctTokenizer\n",
+        "from nltk.stem.isri import ISRIStemmer\n",
+        "import string\n",
+        "import re\n",
+        "from bs4 import BeautifulSoup\n",
+        "nltk.download('stopwords')\n",
+        "\n",
+        "\n",
+        "tok = WordPunctTokenizer()\n",
+        "\n",
+        "def normalize_arabic(text):\n",
+        "    text = re.sub(\"[إأآا]\", \"ا\", text)\n",
+        "    text = re.sub(\"ى\", \"ي\", text)\n",
+        "    text = re.sub(\"ؤ\", \"ء\", text)\n",
+        "    text = re.sub(\"ئ\", \"ء\", text)\n",
+        "    text = re.sub(\"ة\", \"ه\", text)\n",
+        "    text = re.sub(\"گ\", \"ك\", text)\n",
+        "    return text\n",
+        "\n",
+        "\n",
+        "def remove_diacritics(text):\n",
+        "    arabic_diacritics = re.compile(\"\"\"\n",
+        "                             ّ    | # Tashdid\n",
+        "                             َ    | # Fatha\n",
+        "                             ً    | # Tanwin Fath\n",
+        "                             ُ    | # Damma\n",
+        "                             ٌ    | # Tanwin Damm\n",
+        "                             ِ    | # Kasra\n",
+        "                             ٍ    | # Tanwin Kasr\n",
+        "                             ْ    | # Sukun\n",
+        "                             ـ     # Tatwil/Kashida\n",
+        "                         \"\"\", re.VERBOSE)\n",
+        "    return re.sub(arabic_diacritics, '', text)\n",
+        "\n",
+        "\n",
+        "def remove_punctuations(text):\n",
+        "    arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:\"؟.,'{}~¦+|!”…“–ـ'''\n",
+        "    english_punctuations = string.punctuation\n",
+        "    punctuations_list = arabic_punctuations + english_punctuations\n",
+        "    translator = str.maketrans('', '', punctuations_list)\n",
+        "    return text.translate(translator)\n",
+        "\n",
+        "\n",
+        "def remove_repeating_char(text):\n",
+        "    # return re.sub(r'(.)\\1+', r'\\1', text)     # keep only 1 repeat\n",
+        "    return re.sub(r'(.)\\1+', r'\\1\\1', text)  # keep 2 repeat\n",
+        "\n",
+        "def remove_stop_words(text):\n",
+        "    #nltk.download('stopwords')\n",
+        "    englishStopWords = stopwords.words('english')\n",
+        "\n",
+        "    all_stopwords = set(englishStopWords + arabic_stop_words)\n",
+        "\n",
+        "    word_list = nltk.tokenize.wordpunct_tokenize(text.lower())\n",
+        "    word_list = [ w for w in word_list if not w in all_stopwords ]\n",
+        "    return (\" \".join(word_list)).strip()\n",
+        "\n",
+        "def get_root(text):\n",
+        "    word_list = nltk.tokenize.wordpunct_tokenize(text.lower())\n",
+        "    result = []\n",
+        "    arstemmer = ISRIStemmer()\n",
+        "    for word in word_list: result.append(arstemmer.stem(word))\n",
+        "    return (' '.join(result)).strip()\n",
+        "\n",
+        "def clean_tweet(text):\n",
+        "    text = re.sub(r'([@A-Za-z0-9_]+)|#|http\\S+', ' ', text) # removes non arabic letters\n",
+        "    text = re.sub(r'ـــــــــــــ', '', text) # removes non arabic letters\n",
+        "    return text\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "def clean_str(text):\n",
+        "    text = clean_tweet(text)\n",
+        "    # text = normalize_arabic(text)\n",
+        "    text = remove_punctuations(text) ###\n",
+        "    text = remove_diacritics(text)\n",
+        "    text = remove_repeating_char(text) ###\n",
+        "    # text = remove_stop_words(text) ###\n",
+        "\n",
+        "\n",
+        "    text = text.replace('وو', 'و') ###\n",
+        "    text = text.replace('يي', 'ي') ###\n",
+        "    text = text.replace('اا', 'ا') ###\n",
+        "\n",
+        "    # text = get_root(text) ###\n",
+        "\n",
+        "    soup = BeautifulSoup(text, 'lxml')\n",
+        "    souped = soup.get_text()\n",
+        "    pat1 = r'@[A-Za-z0-9]+'\n",
+        "    pat2 = r'https?://[A-Za-z0-9./]+'\n",
+        "    combined_pat = r'|'.join((pat1, pat2))\n",
+        "    stripped = re.sub(combined_pat, '', souped)\n",
+        "    try:\n",
+        "        clean = stripped.decode(\"utf-8-sig\").replace(u\"\\ufffd\", \"?\")\n",
+        "    except:\n",
+        "        clean = stripped\n",
+        "\n",
+        "    words = tok.tokenize(clean)\n",
+        "    return (\" \".join(words)).strip()"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "GjFkTWjivAGM",
+        "outputId": "f791d7bc-7f70-491b-b2f3-60d80a8322f5"
+      },
+      "execution_count": 37,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!gdown \"165kzfZDsRTZAAfZKedeZiUlKzMcHNgPd\"  # arabic stop words\n",
+        "!gdown \"1WdgbvqDYIa-g5ijjsz5zb-3lVvUXUtmS&confirm=t\"  # qarib pretrained model\n",
+        "!gdown \"1foNTGFjhWAxS-_SfF7rga80UmFT7BDJ0&confirm=t\"  # fasttext-ar-vectors-150.bin"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "9OkLaRcYvOky",
+        "outputId": "9b38be6e-aee6-4e34-ba3e-fa142aa95903"
+      },
+      "execution_count": 38,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Downloading...\n",
+            "From: https://drive.google.com/uc?id=165kzfZDsRTZAAfZKedeZiUlKzMcHNgPd\n",
+            "To: /content/Arabic_stop_words.txt\n",
+            "\r  0% 0.00/6.48k [00:00<?, ?B/s]\r100% 6.48k/6.48k [00:00<00:00, 19.2MB/s]\n",
+            "Failed to retrieve file url:\n",
+            "\n",
+            "\tCannot retrieve the public link of the file. You may need to change\n",
+            "\tthe permission to 'Anyone with the link', or have had many accesses.\n",
+            "\tCheck FAQ in https://github.com/wkentaro/gdown?tab=readme-ov-file#faq.\n",
+            "\n",
+            "You may still be able to access the file from the browser:\n",
+            "\n",
+            "\thttps://drive.google.com/uc?id=1WdgbvqDYIa-g5ijjsz5zb-3lVvUXUtmS&confirm=t\n",
+            "\n",
+            "but Gdown can't. Please check connections and permissions.\n",
+            "Failed to retrieve file url:\n",
+            "\n",
+            "\tCannot retrieve the public link of the file. You may need to change\n",
+            "\tthe permission to 'Anyone with the link', or have had many accesses.\n",
+            "\tCheck FAQ in https://github.com/wkentaro/gdown?tab=readme-ov-file#faq.\n",
+            "\n",
+            "You may still be able to access the file from the browser:\n",
+            "\n",
+            "\thttps://drive.google.com/uc?id=1foNTGFjhWAxS-_SfF7rga80UmFT7BDJ0&confirm=t\n",
+            "\n",
+            "but Gdown can't. Please check connections and permissions.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install pyarabic\n",
+        "!pip install farasapy\n",
+        "!pip install transformers[torch]\n",
+        "!pip install Keras-Preprocessing\n",
+        "\n",
+        "! git clone https://github.com/facebookresearch/fastText.git\n",
+        "! cd fastText && sudo pip install ."
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "f_EewfRYuvHF",
+        "outputId": "75a374da-5887-410a-9ed9-807f2e1146bc"
+      },
+      "execution_count": 39,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: pyarabic in /usr/local/lib/python3.10/dist-packages (0.6.15)\n",
+            "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from pyarabic) (1.16.0)\n",
+            "Requirement already satisfied: farasapy in /usr/local/lib/python3.10/dist-packages (0.0.14)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from farasapy) (2.32.3)\n",
+            "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from farasapy) (4.66.5)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->farasapy) (3.3.2)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->farasapy) (3.7)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->farasapy) (2.0.7)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->farasapy) (2024.7.4)\n",
+            "Requirement already satisfied: transformers[torch] in /usr/local/lib/python3.10/dist-packages (4.42.4)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (3.15.4)\n",
+            "Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.23.5)\n",
+            "Requirement already satisfied: numpy<2.0,>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (1.26.4)\n",
+            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (24.1)\n",
+            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (6.0.2)\n",
+            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2024.5.15)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.32.3)\n",
+            "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.4.4)\n",
+            "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.19.1)\n",
+            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (4.66.5)\n",
+            "Requirement already satisfied: accelerate>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (0.32.1)\n",
+            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from transformers[torch]) (2.3.1+cu121)\n",
+            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.21.0->transformers[torch]) (5.9.5)\n",
+            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers[torch]) (2024.6.1)\n",
+            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.2->transformers[torch]) (4.12.2)\n",
+            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (1.13.1)\n",
+            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (3.3)\n",
+            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (3.1.4)\n",
+            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (8.9.2.26)\n",
+            "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.3.1)\n",
+            "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (11.0.2.54)\n",
+            "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (10.3.2.106)\n",
+            "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (11.4.5.107)\n",
+            "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.0.106)\n",
+            "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (2.20.5)\n",
+            "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (12.1.105)\n",
+            "Requirement already satisfied: triton==2.3.1 in /usr/local/lib/python3.10/dist-packages (from torch->transformers[torch]) (2.3.1)\n",
+            "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch->transformers[torch]) (12.6.20)\n",
+            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.3.2)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (3.7)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2.0.7)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers[torch]) (2024.7.4)\n",
+            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->transformers[torch]) (2.1.5)\n",
+            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->transformers[torch]) (1.3.0)\n",
+            "Requirement already satisfied: Keras-Preprocessing in /usr/local/lib/python3.10/dist-packages (1.1.2)\n",
+            "Requirement already satisfied: numpy>=1.9.1 in /usr/local/lib/python3.10/dist-packages (from Keras-Preprocessing) (1.26.4)\n",
+            "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from Keras-Preprocessing) (1.16.0)\n",
+            "fatal: destination path 'fastText' already exists and is not an empty directory.\n",
+            "Processing /content/fastText\n",
+            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
+            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
+            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+            "Requirement already satisfied: pybind11>=2.2 in /usr/local/lib/python3.10/dist-packages (from fasttext==0.9.2) (2.13.4)\n",
+            "Requirement already satisfied: setuptools>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from fasttext==0.9.2) (71.0.4)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from fasttext==0.9.2) (1.26.4)\n",
+            "Building wheels for collected packages: fasttext\n",
+            "  Building wheel for fasttext (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
+            "  Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4246493 sha256=885fb048658f5739230aa0f1739c694da4108e0389d8c475331adf658e2d5d91\n",
+            "  Stored in directory: /tmp/pip-ephem-wheel-cache-j31t7qci/wheels/8b/05/af/3cfae069d904597d44b309c956601b611bdf8967bcbe968903\n",
+            "Successfully built fasttext\n",
+            "Installing collected packages: fasttext\n",
+            "  Attempting uninstall: fasttext\n",
+            "    Found existing installation: fasttext 0.9.2\n",
+            "    Uninstalling fasttext-0.9.2:\n",
+            "      Successfully uninstalled fasttext-0.9.2\n",
+            "Successfully installed fasttext-0.9.2\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from transformers import pipeline\n",
+        "unmasker_MARBERT = pipeline('fill-mask', model='UBC-NLP/MARBERT', top_k=50)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "3sXnrmNU7t66",
+        "outputId": "6a70f0b3-a915-45c1-ae05-e846dcd56e88"
+      },
+      "execution_count": 40,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Some weights of the model checkpoint at UBC-NLP/MARBERT were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']\n",
+            "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+            "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+            "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def light_preprocess(text):\n",
+        "    text = clean_tweet(text)\n",
+        "    # text = normalize_arabic(text)\n",
+        "    text = remove_punctuations(text) ###\n",
+        "    text = remove_diacritics(text)\n",
+        "    text = remove_repeating_char(text) ###\n",
+        "    text = text.replace('وو', 'و') ###\n",
+        "    text = text.replace('يي', 'ي') ###\n",
+        "    text = text.replace('اا', 'ا') ###\n",
+        "    return text\n",
+        "\n",
+        "nltk.download('stopwords')\n",
+        "englishStopWords = stopwords.words('english')\n",
+        "arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:\"؟.,'{}~¦+|!”…“–ـ'''\n",
+        "english_punctuations = string.punctuation\n",
+        "punctuations_list = arabic_punctuations + english_punctuations\n",
+        "\n",
+        "all_stopwords = set(englishStopWords + arabic_stop_words)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "erAHadS9sIww",
+        "outputId": "db064763-15a9-4f1f-a2a3-246cf1cf09ad"
+      },
+      "execution_count": 41,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]   Package stopwords is already up-to-date!\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install torch # Install the PyTorch library if you haven't already\n",
+        "\n",
+        "import torch\n",
+        "# Determine if a GPU is available and set the device accordingly\n",
+        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+        "def classsify_tweets(tweet):\n",
+        "    df = pd.DataFrame({\"tweet\": tweet})\n",
+        "    df['clean_tweet'] = df['tweet'].apply(lambda x: clean_str(x))\n",
+        "\n",
+        "    dev_df = pd.DataFrame({\n",
+        "        'id':range(len(df)),\n",
+        "        'text': df[\"clean_tweet\"]\n",
+        "        })\n",
+        "\n",
+        "    test_example = SingleSentenceClassificationProcessor(mode='classification')\n",
+        "    test_example.add_examples(texts_or_text_and_labels=dev_df['text'], overwrite_examples = True)\n",
+        "\n",
+        "    test_features = test_example.get_features(tokenizer = tokenizer, max_length =64)\n",
+        "\n",
+        "    input_ids = [i.input_ids for i in test_features]\n",
+        "    attention_masks = [i.attention_mask for i in test_features]\n",
+        "\n",
+        "    inputs = torch.tensor(input_ids)\n",
+        "    masks = torch.tensor(attention_masks)\n",
+        "\n",
+        "    # Put the model in an evaluation state\n",
+        "    model.eval()\n",
+        "\n",
+        "    # Transfer model to GPU\n",
+        "    model.to(device)\n",
+        "\n",
+        "    torch.cuda.empty_cache() # empty the gpu memory\n",
+        "    # Transfer the batch to gpu\n",
+        "    inputs = inputs.to(device)\n",
+        "    masks = masks.to(device)\n",
+        "\n",
+        "    # Run inference on the example\n",
+        "    output = model(inputs, attention_mask=masks)[\"logits\"]\n",
+        "    # Transfer the output to CPU again and convert to numpy\n",
+        "    output = output.cpu().detach().numpy()\n",
+        "\n",
+        "    return output"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "68JNE5IX2o47",
+        "outputId": "750d6a84-dbfb-4cb1-9983-88b8aeb7d547"
+      },
+      "execution_count": 42,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.3.1+cu121)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.15.4)\n",
+            "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.2)\n",
+            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.1)\n",
+            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.3)\n",
+            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n",
+            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2024.6.1)\n",
+            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
+            "Requirement already satisfied: nvidia-cudnn-cu12==8.9.2.26 in /usr/local/lib/python3.10/dist-packages (from torch) (8.9.2.26)\n",
+            "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.3.1)\n",
+            "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.10/dist-packages (from torch) (11.0.2.54)\n",
+            "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.10/dist-packages (from torch) (10.3.2.106)\n",
+            "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.10/dist-packages (from torch) (11.4.5.107)\n",
+            "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.0.106)\n",
+            "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch) (2.20.5)\n",
+            "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.10/dist-packages (from torch) (12.1.105)\n",
+            "Requirement already satisfied: triton==2.3.1 in /usr/local/lib/python3.10/dist-packages (from torch) (2.3.1)\n",
+            "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.10/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch) (12.6.20)\n",
+            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n",
+            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "size = len(test_data)\n",
+        "print(\"size of test set:\", size)\n",
+        "correct_class_tweets = []\n",
+        "correct_class = []\n",
+        "for i in range(0, size):\n",
+        "    txt = test_data['Text'].astype('U')[i]\n",
+        "    cls = test_data['label'][i]\n",
+        "    label = id2label[np.argmax(classsify_tweets([txt]), axis=1)[0]]\n",
+        "    if label == cls and label == 1:\n",
+        "        correct_class_tweets.append(txt)\n",
+        "        correct_class.append(cls)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Qr6Xai9n6-O3",
+        "outputId": "2ab47c28-e19f-4ba6-8f77-ca1be3ecf569"
+      },
+      "execution_count": 69,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "size of test set: 2540\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from scipy.spatial import distance\n",
+        "from farasa.stemmer import FarasaStemmer\n",
+        "frasa_stemmer = FarasaStemmer(interactive=True)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "_Ah-5HTtwxSq",
+        "outputId": "405181a8-6017-4966-9048-9a8e2dd86c06"
+      },
+      "execution_count": 45,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "[2024-08-18 15:12:31,185 - farasapy_logger - WARNING]: Be careful with large lines as they may break on interactive mode. You may switch to Standalone mode for such cases.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install emoji"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "47lHG0uOwxZ9",
+        "outputId": "aa715294-b4bd-47f2-c240-5a360e48c300"
+      },
+      "execution_count": 46,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: emoji in /usr/local/lib/python3.10/dist-packages (2.12.1)\n",
+            "Requirement already satisfied: typing-extensions>=4.7.0 in /usr/local/lib/python3.10/dist-packages (from emoji) (4.12.2)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import emoji\n",
+        "\n",
+        "def select_best_replacement(pos, x_cur, verbose=False):\n",
+        "    \"\"\" Select the most effective replacement to word at pos (pos) in (x_cur)\"\"\"\n",
+        "\n",
+        "    if bool(emoji.emoji_count(x_cur.split()[pos])):\n",
+        "        return None\n",
+        "\n",
+        "    embedding_masked_word = model_fasttext[x_cur.split()[pos]]\n",
+        "\n",
+        "    x_masked = (\" \".join(x_cur.split()[:pos]) + \" [MASK] \" + \" \".join(x_cur.split()[pos + 1:])).strip()\n",
+        "    unmasked_seq = unmasker_MARBERT(x_masked)[:20]\n",
+        "\n",
+        "    max_sim = -1\n",
+        "    best_perturb_dict = {}\n",
+        "    for seq in unmasked_seq:\n",
+        "        if frasa_stemmer.stem(seq['token_str']) in frasa_stemmer.stem(x_cur.split()[pos]):\n",
+        "            continue\n",
+        "        if seq['token_str'] in punctuations_list or pos >= len(seq[\"sequence\"].split()):\n",
+        "            continue\n",
+        "        embedding_masked_word_new = model_fasttext[seq['token_str']]\n",
+        "        if np.sum(embedding_masked_word) == 0 or np.sum(embedding_masked_word_new) == 0:\n",
+        "            continue\n",
+        "        if verbose: print(\"New word: \", seq['token_str'])\n",
+        "        sim = 1 - distance.cosine(embedding_masked_word, embedding_masked_word_new)\n",
+        "        if sim > max_sim:\n",
+        "            max_sim = sim\n",
+        "            best_perturb_dict[\"sim\"] = sim\n",
+        "            best_perturb_dict[\"Masked word\"] = x_cur.split()[pos]\n",
+        "            best_perturb_dict[\"New word\"] = seq['token_str']\n",
+        "            best_perturb_dict[\"New seq\"] = x_cur.replace(x_cur.split()[pos], seq['token_str'])\n",
+        "\n",
+        "    return best_perturb_dict.get(\"New seq\", None)\n",
+        "\n",
+        "# Process tweets and perturb\n",
+        "perturb_counter = 0\n",
+        "for tweet_ix, tweet in enumerate(correct_class_tweets):\n",
+        "    print(\"Tweet index: \", tweet_ix)\n",
+        "\n",
+        "    x_adv = light_preprocess(tweet)\n",
+        "    x_len = len(x_adv.split())\n",
+        "    orig_class = np.argmax(classsify_tweets([x_adv]), axis=1)[0]\n",
+        "    orig_label = id2label[orig_class]\n",
+        "    print(f\"Original tweet: {x_adv} : Original label: {orig_label}.\")\n",
+        "    splits = len(x_adv.split())\n",
+        "    perturbed_flag = False\n",
+        "    for split_ix in range(splits):\n",
+        "        perturbed = select_best_replacement(split_ix, x_adv)\n",
+        "        if perturbed:\n",
+        "            new_class = np.argmax(classsify_tweets([perturbed]), axis=1)[0]\n",
+        "            if orig_class != new_class:\n",
+        "                print(f\"Perturbed tweet: {perturbed} : New label: {id2label[new_class]}.\")\n",
+        "                print(10 * \"==\")\n",
+        "                if not perturbed_flag:\n",
+        "                    perturb_counter += 1\n",
+        "                perturbed_flag = True\n",
+        "    if not perturbed_flag:\n",
+        "        print(10 * \"==\")\n",
+        "print(f\"Successful perturbation {perturb_counter} out of {len(correct_class_tweets)}.\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "sy4En-7yGWGN",
+        "outputId": "97f075cb-e545-4606-be7e-e0ad4c08d27d"
+      },
+      "execution_count": 75,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Successful perturbation 0 out of 0.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "off_tweets_count = sum(test_data['label'] == 1 )\n",
+        "print(f\"Number of offensive tweets in the dataset: {off_tweets_count}\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ttonOFkbDdI6",
+        "outputId": "0d329caa-4ce7-49ca-d335-e50ece1e88da"
+      },
+      "execution_count": 74,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Number of offensive tweets in the dataset: 887\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "test_data['label'] = test_data['label'].astype(int)\n"
+      ],
+      "metadata": {
+        "id": "M19XRkw7I2mk"
+      },
+      "execution_count": 73,
+      "outputs": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file