Spaces:

XicoC
/

AIMakerSpace-Midterm

Sleeping

App Files Files Community

xicocdi commited on Sep 23

Commit

2182f80

•

1 Parent(s): a942057

final push

Browse files

Files changed (2) hide show

Embedding_Model_Eval.ipynb +206 -0
app.py +1 -5

Embedding_Model_Eval.ipynb CHANGED Viewed

@@ -576,6 +576,212 @@
     "multiquery_ft_embedding_metrics_df.to_csv(\"multiquery_ft_embedding_metrics.csv\", index=False)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 41,

     "multiquery_ft_embedding_metrics_df.to_csv(\"multiquery_ft_embedding_metrics.csv\", index=False)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "multiquery_metrics_df = pd.read_csv(\"multiquery_metrics.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "multiquery_ft_embedding_metrics_df = pd.read_csv(\"multiquery_ft_embedding_metrics.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Metric</th>\n",
+       "      <th>MultiQuery</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>faithfulness</td>\n",
+       "      <td>0.896804</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>answer_relevancy</td>\n",
+       "      <td>0.953211</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>context_recall</td>\n",
+       "      <td>0.890625</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>context_precision</td>\n",
+       "      <td>0.920732</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>answer_correctness</td>\n",
+       "      <td>0.690058</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               Metric  MultiQuery\n",
+       "0        faithfulness    0.896804\n",
+       "1    answer_relevancy    0.953211\n",
+       "2      context_recall    0.890625\n",
+       "3   context_precision    0.920732\n",
+       "4  answer_correctness    0.690058"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "multiquery_metrics_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Metric</th>\n",
+       "      <th>MultiQuery</th>\n",
+       "      <th>Fine-Tune Embedding</th>\n",
+       "      <th>Baseline -&gt; Fine-Tune Embedding</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>faithfulness</td>\n",
+       "      <td>0.896804</td>\n",
+       "      <td>0.868351</td>\n",
+       "      <td>-0.028452</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>answer_relevancy</td>\n",
+       "      <td>0.953211</td>\n",
+       "      <td>0.955777</td>\n",
+       "      <td>0.002566</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>context_recall</td>\n",
+       "      <td>0.890625</td>\n",
+       "      <td>0.944444</td>\n",
+       "      <td>0.053819</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>context_precision</td>\n",
+       "      <td>0.920732</td>\n",
+       "      <td>0.953668</td>\n",
+       "      <td>0.032936</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>answer_correctness</td>\n",
+       "      <td>0.690058</td>\n",
+       "      <td>0.603407</td>\n",
+       "      <td>-0.086651</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               Metric  MultiQuery  Fine-Tune Embedding  \\\n",
+       "0        faithfulness    0.896804             0.868351   \n",
+       "1    answer_relevancy    0.953211             0.955777   \n",
+       "2      context_recall    0.890625             0.944444   \n",
+       "3   context_precision    0.920732             0.953668   \n",
+       "4  answer_correctness    0.690058             0.603407   \n",
+       "\n",
+       "   Baseline -> Fine-Tune Embedding  \n",
+       "0                        -0.028452  \n",
+       "1                         0.002566  \n",
+       "2                         0.053819  \n",
+       "3                         0.032936  \n",
+       "4                        -0.086651  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_baseline_ft_embeddings = pd.merge(multiquery_metrics_df, multiquery_ft_embedding_metrics_df, on='Metric')\n",
+    "\n",
+    "df_baseline_ft_embeddings['Baseline -> Fine-Tune Embedding'] = df_baseline_ft_embeddings['Fine-Tune Embedding'] - df_baseline_ft_embeddings['MultiQuery']\n",
+    "\n",
+    "df_baseline_ft_embeddings"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 41,

app.py CHANGED Viewed

@@ -76,13 +76,9 @@ retriever = vectorstore.as_retriever(
 llm = ChatOpenAI(
     model="gpt-4o-mini",
     temperature=0,
-    streaming=True,
 )
-retriever_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
-multiquery_retriever = MultiQueryRetriever.from_llm(
-    retriever=retriever, llm=retriever_llm
-)
 @cl.on_chat_start

 llm = ChatOpenAI(
     model="gpt-4o-mini",
     temperature=0,
 )
+multiquery_retriever = MultiQueryRetriever.from_llm(retriever=retriever, llm=llm)
 @cl.on_chat_start