{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "dc6eb3bd-f5f1-4995-9f42-c86d9de320dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "#         ARC, HellaSwag, MMLU,TruthfulQA, Winogrande, GSM8K\n",
    "DPO_v4 = [62.54, 79.73, 68.08, 53.94, 75.61, 71.04]\n",
    "DPO_v3 = [62.46, 79.5, 68.21, 53.27, 75.93, 70.81]\n",
    "DPO_v2 = [62.63, 79.2, 68.33, 53.29, 75.37, 70.58]\n",
    "DPO_v1 = [61.52, 79.06, 67.09, 51.85, 74.66, 69.29]\n",
    "meta = [60.75,78.55,67.07,51.65,74.51,68.69]\n",
    "\n",
    "#Scatter / BAR ->  AVG scores of each model (Score vs Categoryname)\n",
    "# Line -> Task scores for each model (Score vs Epoch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "523ddb4b-4cf7-4c83-98c7-e46ea8df4e5d",
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'matplotlib'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[14], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "86bfc73e-2f02-4fa4-a2dd-24d998c1123b",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ARC</th>\n",
       "      <th>HellaSwag</th>\n",
       "      <th>MMLU</th>\n",
       "      <th>TruthfulQA</th>\n",
       "      <th>Winogrande</th>\n",
       "      <th>GSM8K</th>\n",
       "      <th>AVG</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>62.54</td>\n",
       "      <td>79.73</td>\n",
       "      <td>68.08</td>\n",
       "      <td>53.94</td>\n",
       "      <td>75.61</td>\n",
       "      <td>71.04</td>\n",
       "      <td>68.490000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>62.46</td>\n",
       "      <td>79.50</td>\n",
       "      <td>68.21</td>\n",
       "      <td>53.27</td>\n",
       "      <td>75.93</td>\n",
       "      <td>70.81</td>\n",
       "      <td>68.363333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>62.63</td>\n",
       "      <td>79.20</td>\n",
       "      <td>68.33</td>\n",
       "      <td>53.29</td>\n",
       "      <td>75.37</td>\n",
       "      <td>70.58</td>\n",
       "      <td>68.233333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>61.52</td>\n",
       "      <td>79.06</td>\n",
       "      <td>67.09</td>\n",
       "      <td>51.85</td>\n",
       "      <td>74.66</td>\n",
       "      <td>69.29</td>\n",
       "      <td>67.245000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>60.75</td>\n",
       "      <td>78.55</td>\n",
       "      <td>67.07</td>\n",
       "      <td>51.65</td>\n",
       "      <td>74.51</td>\n",
       "      <td>68.69</td>\n",
       "      <td>66.870000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     ARC  HellaSwag   MMLU  TruthfulQA  Winogrande  GSM8K        AVG\n",
       "0  62.54      79.73  68.08       53.94       75.61  71.04  68.490000\n",
       "1  62.46      79.50  68.21       53.27       75.93  70.81  68.363333\n",
       "2  62.63      79.20  68.33       53.29       75.37  70.58  68.233333\n",
       "3  61.52      79.06  67.09       51.85       74.66  69.29  67.245000\n",
       "4  60.75      78.55  67.07       51.65       74.51  68.69  66.870000"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# !pip install pandas\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame([DPO_v4,DPO_v3,DPO_v2,DPO_v1 ,meta], columns=['ARC', 'HellaSwag', 'MMLU','TruthfulQA', 'Winogrande', 'GSM8K'])\n",
    "df['AVG'] = df.mean(axis=1)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "89874d9c-2544-439a-a0ef-85acb74e4cc9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    68.490000\n",
       "1    68.363333\n",
       "2    68.233333\n",
       "3    67.245000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.mean(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d5278ea-0254-4881-82a3-360b551a9a1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Scatter plot for AVG scores\n",
    "plt.figure(figsize=(10, 6))\n",
    "plt.scatter(df.columns[:-1], df['AVG'], color='blue', label='Average Score')\n",
    "plt.xlabel('Model Name')\n",
    "plt.ylabel('Average Score')\n",
    "plt.title('Average Scores of Models')\n",
    "plt.legend()\n",
    "plt.grid(True)\n",
    "plt.savefig('avg_scores_scatter.png')  # Save the plot as PNG\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}