Spaces:
Sleeping
Sleeping
xicocdi
commited on
Commit
•
a942057
1
Parent(s):
029f20b
push evaluation notebooks
Browse files- Chunking_Strat_Eval.ipynb +185 -118
- midterm_fine_tune_embeddings_model.ipynb +0 -0
Chunking_Strat_Eval.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {
|
7 |
"colab": {
|
8 |
"base_uri": "https://localhost:8080/"
|
@@ -10,7 +10,165 @@
|
|
10 |
"id": "5BN13TZlSCv4",
|
11 |
"outputId": "424a6920-0cea-4e28-dce0-3de6f0a4cc3c"
|
12 |
},
|
13 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
"source": [
|
15 |
"!pip install langchain langchain_community langchain_openai chromadb pypdf langsmith qdrant-client ragas pandas"
|
16 |
]
|
@@ -87,7 +245,7 @@
|
|
87 |
},
|
88 |
{
|
89 |
"cell_type": "code",
|
90 |
-
"execution_count":
|
91 |
"metadata": {},
|
92 |
"outputs": [
|
93 |
{
|
@@ -114,7 +272,7 @@
|
|
114 |
},
|
115 |
{
|
116 |
"cell_type": "code",
|
117 |
-
"execution_count":
|
118 |
"metadata": {},
|
119 |
"outputs": [],
|
120 |
"source": [
|
@@ -124,7 +282,7 @@
|
|
124 |
},
|
125 |
{
|
126 |
"cell_type": "code",
|
127 |
-
"execution_count":
|
128 |
"metadata": {},
|
129 |
"outputs": [],
|
130 |
"source": [
|
@@ -136,7 +294,7 @@
|
|
136 |
},
|
137 |
{
|
138 |
"cell_type": "code",
|
139 |
-
"execution_count":
|
140 |
"metadata": {},
|
141 |
"outputs": [],
|
142 |
"source": [
|
@@ -147,6 +305,26 @@
|
|
147 |
"baseline_docs = text_splitter.split_documents(pdf_documents)"
|
148 |
]
|
149 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
{
|
151 |
"cell_type": "code",
|
152 |
"execution_count": 13,
|
@@ -709,117 +887,6 @@
|
|
709 |
"df_baseline.to_csv(\"df_baseline_metrics.csv\", index=False)"
|
710 |
]
|
711 |
},
|
712 |
-
{
|
713 |
-
"cell_type": "code",
|
714 |
-
"execution_count": 26,
|
715 |
-
"metadata": {},
|
716 |
-
"outputs": [],
|
717 |
-
"source": [
|
718 |
-
"from datetime import datetime, timedelta"
|
719 |
-
]
|
720 |
-
},
|
721 |
-
{
|
722 |
-
"cell_type": "code",
|
723 |
-
"execution_count": 45,
|
724 |
-
"metadata": {},
|
725 |
-
"outputs": [],
|
726 |
-
"source": [
|
727 |
-
"api_key = os.environ[\"LANGCHAIN_API_KEY\"]"
|
728 |
-
]
|
729 |
-
},
|
730 |
-
{
|
731 |
-
"cell_type": "code",
|
732 |
-
"execution_count": 39,
|
733 |
-
"metadata": {},
|
734 |
-
"outputs": [],
|
735 |
-
"source": [
|
736 |
-
"import uuid\n",
|
737 |
-
"import requests"
|
738 |
-
]
|
739 |
-
},
|
740 |
-
{
|
741 |
-
"cell_type": "code",
|
742 |
-
"execution_count": 46,
|
743 |
-
"metadata": {},
|
744 |
-
"outputs": [],
|
745 |
-
"source": [
|
746 |
-
"unique_dataset_id = str(uuid.uuid4())\n",
|
747 |
-
"dataset_name = f\"RAGAS Midterm Eval Dataset - {unique_dataset_id[:8]}\""
|
748 |
-
]
|
749 |
-
},
|
750 |
-
{
|
751 |
-
"cell_type": "code",
|
752 |
-
"execution_count": 47,
|
753 |
-
"metadata": {},
|
754 |
-
"outputs": [
|
755 |
-
{
|
756 |
-
"name": "stdout",
|
757 |
-
"output_type": "stream",
|
758 |
-
"text": [
|
759 |
-
"403\n",
|
760 |
-
"{'detail': 'Forbidden'}\n"
|
761 |
-
]
|
762 |
-
}
|
763 |
-
],
|
764 |
-
"source": [
|
765 |
-
"experiment_start_time = datetime.now()\n",
|
766 |
-
"experiment_end_time = experiment_start_time + timedelta(minutes=30) # Adjust as needed\n",
|
767 |
-
"\n",
|
768 |
-
"results = []\n",
|
769 |
-
"for index, row in results_df.iterrows():\n",
|
770 |
-
" start_time = experiment_start_time + timedelta(seconds=index)\n",
|
771 |
-
" end_time = start_time + timedelta(seconds=1)\n",
|
772 |
-
" results.append({\n",
|
773 |
-
" \"row_id\": str(uuid.uuid4()),\n",
|
774 |
-
" \"inputs\": {\"question\": row[\"question\"]},\n",
|
775 |
-
" \"expected_outputs\": {\"ground_truth\": row[\"ground_truth\"]},\n",
|
776 |
-
" \"actual_outputs\": {\"answer\": row[\"answer\"]},\n",
|
777 |
-
" \"evaluation_scores\": [\n",
|
778 |
-
" {\"key\": \"faithfulness\", \"score\": row[\"faithfulness\"]},\n",
|
779 |
-
" {\"key\": \"answer_relevancy\", \"score\": row[\"answer_relevancy\"]},\n",
|
780 |
-
" {\"key\": \"context_recall\", \"score\": row[\"context_recall\"]},\n",
|
781 |
-
" {\"key\": \"context_precision\", \"score\": row[\"context_precision\"]},\n",
|
782 |
-
" {\"key\": \"answer_correctness\", \"score\": row[\"answer_correctness\"]}\n",
|
783 |
-
" ],\n",
|
784 |
-
" \"start_time\": start_time.isoformat(),\n",
|
785 |
-
" \"end_time\": end_time.isoformat(),\n",
|
786 |
-
" \"run_name\": f\"Baseline Run {index}\"\n",
|
787 |
-
" })\n",
|
788 |
-
"\n",
|
789 |
-
"summary_scores = [\n",
|
790 |
-
" {\"key\": \"faithfulness\", \"score\": results_df[\"faithfulness\"].mean(), \"comment\": \"Average faithfulness score\"},\n",
|
791 |
-
" {\"key\": \"answer_relevancy\", \"score\": results_df[\"answer_relevancy\"].mean(), \"comment\": \"Average answer relevancy score\"},\n",
|
792 |
-
" {\"key\": \"context_recall\", \"score\": results_df[\"context_recall\"].mean(), \"comment\": \"Average context recall score\"},\n",
|
793 |
-
" {\"key\": \"context_precision\", \"score\": results_df[\"context_precision\"].mean(), \"comment\": \"Average context precision score\"},\n",
|
794 |
-
" {\"key\": \"answer_correctness\", \"score\": results_df[\"answer_correctness\"].mean(), \"comment\": \"Average answer correctness score\"}\n",
|
795 |
-
"]\n",
|
796 |
-
"\n",
|
797 |
-
"body = {\n",
|
798 |
-
" \"experiment_name\": \"Baseline Midterm Evaluation\",\n",
|
799 |
-
" \"experiment_description\": \"Baseline evaluation of Midterm Evaluation using Ragas metrics\",\n",
|
800 |
-
" \"dataset_name\": dataset_name,\n",
|
801 |
-
" \"dataset_description\": \"Dataset for RAGBot evaluation using Ragas metrics\",\n",
|
802 |
-
" \"experiment_start_time\": experiment_start_time.isoformat(),\n",
|
803 |
-
" \"experiment_end_time\": experiment_end_time.isoformat(),\n",
|
804 |
-
" \"experiment_metadata\": {\n",
|
805 |
-
" \"model\": \"gpt-4o-mini\",\n",
|
806 |
-
" \"retriever\": \"Qdrant with MMR\",\n",
|
807 |
-
" \"chunk_size\": \"1000 w/ 200 overlap\"\n",
|
808 |
-
" },\n",
|
809 |
-
" \"summary_experiment_scores\": summary_scores,\n",
|
810 |
-
" \"results\": results\n",
|
811 |
-
"}\n",
|
812 |
-
"\n",
|
813 |
-
"response = requests.post(\n",
|
814 |
-
" \"https://api.smith.langchain.com/api/v1/datasets/upload-experiment\",\n",
|
815 |
-
" json=body,\n",
|
816 |
-
" headers={\"x-api-key\": api_key}\n",
|
817 |
-
")\n",
|
818 |
-
"\n",
|
819 |
-
"print(response.status_code)\n",
|
820 |
-
"print(response.json())"
|
821 |
-
]
|
822 |
-
},
|
823 |
{
|
824 |
"cell_type": "code",
|
825 |
"execution_count": 27,
|
@@ -3073,7 +3140,7 @@
|
|
3073 |
"name": "python",
|
3074 |
"nbconvert_exporter": "python",
|
3075 |
"pygments_lexer": "ipython3",
|
3076 |
-
"version": "3.12.
|
3077 |
},
|
3078 |
"widgets": {
|
3079 |
"application/vnd.jupyter.widget-state+json": {
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {
|
7 |
"colab": {
|
8 |
"base_uri": "https://localhost:8080/"
|
|
|
10 |
"id": "5BN13TZlSCv4",
|
11 |
"outputId": "424a6920-0cea-4e28-dce0-3de6f0a4cc3c"
|
12 |
},
|
13 |
+
"outputs": [
|
14 |
+
{
|
15 |
+
"name": "stdout",
|
16 |
+
"output_type": "stream",
|
17 |
+
"text": [
|
18 |
+
"Requirement already satisfied: langchain in /opt/anaconda3/lib/python3.12/site-packages (0.3.0)\n",
|
19 |
+
"Requirement already satisfied: langchain_community in /opt/anaconda3/lib/python3.12/site-packages (0.2.17)\n",
|
20 |
+
"Requirement already satisfied: langchain_openai in /opt/anaconda3/lib/python3.12/site-packages (0.2.0)\n",
|
21 |
+
"Requirement already satisfied: chromadb in /opt/anaconda3/lib/python3.12/site-packages (0.5.5)\n",
|
22 |
+
"Requirement already satisfied: pypdf in /opt/anaconda3/lib/python3.12/site-packages (5.0.0)\n",
|
23 |
+
"Requirement already satisfied: langsmith in /opt/anaconda3/lib/python3.12/site-packages (0.1.125)\n",
|
24 |
+
"Requirement already satisfied: qdrant-client in /opt/anaconda3/lib/python3.12/site-packages (1.11.1)\n",
|
25 |
+
"Requirement already satisfied: ragas in /opt/anaconda3/lib/python3.12/site-packages (0.1.20)\n",
|
26 |
+
"Requirement already satisfied: pandas in /opt/anaconda3/lib/python3.12/site-packages (2.2.2)\n",
|
27 |
+
"Requirement already satisfied: PyYAML>=5.3 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (6.0.1)\n",
|
28 |
+
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (2.0.30)\n",
|
29 |
+
"Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (3.9.5)\n",
|
30 |
+
"Requirement already satisfied: langchain-core<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (0.3.2)\n",
|
31 |
+
"Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (0.3.0)\n",
|
32 |
+
"Requirement already satisfied: numpy<2.0.0,>=1.26.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (1.26.4)\n",
|
33 |
+
"Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (2.9.2)\n",
|
34 |
+
"Requirement already satisfied: requests<3,>=2 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (2.32.2)\n",
|
35 |
+
"Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (8.5.0)\n",
|
36 |
+
"Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_community) (0.5.14)\n",
|
37 |
+
"Collecting langchain\n",
|
38 |
+
" Using cached langchain-0.2.16-py3-none-any.whl.metadata (7.1 kB)\n",
|
39 |
+
"INFO: pip is looking at multiple versions of langchain-community to determine which version is compatible with other requirements. This could take a while.\n",
|
40 |
+
"Collecting langchain_community\n",
|
41 |
+
" Using cached langchain_community-0.3.0-py3-none-any.whl.metadata (2.8 kB)\n",
|
42 |
+
"Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_community) (2.5.2)\n",
|
43 |
+
"Requirement already satisfied: openai<2.0.0,>=1.40.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_openai) (1.46.0)\n",
|
44 |
+
"Requirement already satisfied: tiktoken<1,>=0.7 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_openai) (0.7.0)\n",
|
45 |
+
"Requirement already satisfied: build>=1.0.3 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.2.1)\n",
|
46 |
+
"Requirement already satisfied: chroma-hnswlib==0.7.6 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.7.6)\n",
|
47 |
+
"Requirement already satisfied: fastapi>=0.95.2 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.110.3)\n",
|
48 |
+
"Requirement already satisfied: uvicorn>=0.18.3 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.25.0)\n",
|
49 |
+
"Requirement already satisfied: posthog>=2.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (3.5.0)\n",
|
50 |
+
"Requirement already satisfied: typing-extensions>=4.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.11.0)\n",
|
51 |
+
"Requirement already satisfied: onnxruntime>=1.14.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.18.1)\n",
|
52 |
+
"Requirement already satisfied: opentelemetry-api>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.25.0)\n",
|
53 |
+
"Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.25.0)\n",
|
54 |
+
"Requirement already satisfied: opentelemetry-instrumentation-fastapi>=0.41b0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.46b0)\n",
|
55 |
+
"Requirement already satisfied: opentelemetry-sdk>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.25.0)\n",
|
56 |
+
"Requirement already satisfied: tokenizers>=0.13.2 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.19.1)\n",
|
57 |
+
"Requirement already satisfied: pypika>=0.48.9 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.48.9)\n",
|
58 |
+
"Requirement already satisfied: tqdm>=4.65.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.66.4)\n",
|
59 |
+
"Requirement already satisfied: overrides>=7.3.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (7.4.0)\n",
|
60 |
+
"Requirement already satisfied: importlib-resources in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (6.4.0)\n",
|
61 |
+
"Requirement already satisfied: grpcio>=1.58.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.66.1)\n",
|
62 |
+
"Requirement already satisfied: bcrypt>=4.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.2.0)\n",
|
63 |
+
"Requirement already satisfied: typer>=0.9.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.12.3)\n",
|
64 |
+
"Requirement already satisfied: kubernetes>=28.1.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (30.1.0)\n",
|
65 |
+
"Requirement already satisfied: mmh3>=4.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.1.0)\n",
|
66 |
+
"Requirement already satisfied: orjson>=3.9.12 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (3.10.6)\n",
|
67 |
+
"Requirement already satisfied: httpx>=0.27.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.27.0)\n",
|
68 |
+
"Requirement already satisfied: grpcio-tools>=1.41.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client) (1.62.3)\n",
|
69 |
+
"Requirement already satisfied: portalocker<3.0.0,>=2.7.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client) (2.10.1)\n",
|
70 |
+
"Requirement already satisfied: urllib3<3,>=1.26.14 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client) (2.2.2)\n",
|
71 |
+
"Requirement already satisfied: datasets in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (3.0.0)\n",
|
72 |
+
"INFO: pip is looking at multiple versions of ragas to determine which version is compatible with other requirements. This could take a while.\n",
|
73 |
+
"Collecting ragas\n",
|
74 |
+
" Downloading ragas-0.1.19-py3-none-any.whl.metadata (5.4 kB)\n",
|
75 |
+
"Requirement already satisfied: pysbd>=0.3.4 in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (0.3.4)\n",
|
76 |
+
"Requirement already satisfied: nest-asyncio in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (1.6.0)\n",
|
77 |
+
"Requirement already satisfied: appdirs in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (1.4.4)\n",
|
78 |
+
"Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
|
79 |
+
"Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2024.1)\n",
|
80 |
+
"Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2023.3)\n",
|
81 |
+
"Requirement already satisfied: aiosignal>=1.1.2 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.2.0)\n",
|
82 |
+
"Requirement already satisfied: attrs>=17.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\n",
|
83 |
+
"Requirement already satisfied: frozenlist>=1.1.1 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.0)\n",
|
84 |
+
"Requirement already satisfied: multidict<7.0,>=4.5 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n",
|
85 |
+
"Requirement already satisfied: yarl<2.0,>=1.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.3)\n",
|
86 |
+
"Requirement already satisfied: packaging>=19.1 in /opt/anaconda3/lib/python3.12/site-packages (from build>=1.0.3->chromadb) (23.2)\n",
|
87 |
+
"Requirement already satisfied: pyproject_hooks in /opt/anaconda3/lib/python3.12/site-packages (from build>=1.0.3->chromadb) (1.1.0)\n",
|
88 |
+
"Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /opt/anaconda3/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community) (3.21.3)\n",
|
89 |
+
"Requirement already satisfied: typing-inspect<1,>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community) (0.9.0)\n",
|
90 |
+
"Requirement already satisfied: starlette<0.38.0,>=0.37.2 in /opt/anaconda3/lib/python3.12/site-packages (from fastapi>=0.95.2->chromadb) (0.37.2)\n",
|
91 |
+
"Requirement already satisfied: protobuf<5.0dev,>=4.21.6 in /opt/anaconda3/lib/python3.12/site-packages (from grpcio-tools>=1.41.0->qdrant-client) (4.25.4)\n",
|
92 |
+
"Requirement already satisfied: setuptools in /opt/anaconda3/lib/python3.12/site-packages (from grpcio-tools>=1.41.0->qdrant-client) (69.5.1)\n",
|
93 |
+
"Requirement already satisfied: anyio in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (3.7.1)\n",
|
94 |
+
"Requirement already satisfied: certifi in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (2024.6.2)\n",
|
95 |
+
"Requirement already satisfied: httpcore==1.* in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (1.0.5)\n",
|
96 |
+
"Requirement already satisfied: idna in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (3.7)\n",
|
97 |
+
"Requirement already satisfied: sniffio in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (1.3.0)\n",
|
98 |
+
"Requirement already satisfied: h11<0.15,>=0.13 in /opt/anaconda3/lib/python3.12/site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb) (0.14.0)\n",
|
99 |
+
"Requirement already satisfied: h2<5,>=3 in /opt/anaconda3/lib/python3.12/site-packages (from httpx[http2]>=0.20.0->qdrant-client) (4.1.0)\n",
|
100 |
+
"Requirement already satisfied: six>=1.9.0 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (1.16.0)\n",
|
101 |
+
"Requirement already satisfied: google-auth>=1.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (2.33.0)\n",
|
102 |
+
"Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (1.8.0)\n",
|
103 |
+
"Requirement already satisfied: requests-oauthlib in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (2.0.0)\n",
|
104 |
+
"Requirement already satisfied: oauthlib>=3.2.2 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (3.2.2)\n",
|
105 |
+
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/anaconda3/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.3.0->langchain) (1.33)\n",
|
106 |
+
"Requirement already satisfied: coloredlogs in /opt/anaconda3/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n",
|
107 |
+
"Requirement already satisfied: flatbuffers in /opt/anaconda3/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb) (24.3.25)\n",
|
108 |
+
"Requirement already satisfied: sympy in /opt/anaconda3/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n",
|
109 |
+
"Requirement already satisfied: distro<2,>=1.7.0 in /opt/anaconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.40.0->langchain_openai) (1.9.0)\n",
|
110 |
+
"Requirement already satisfied: jiter<1,>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.40.0->langchain_openai) (0.5.0)\n",
|
111 |
+
"Requirement already satisfied: deprecated>=1.2.6 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-api>=1.2.0->chromadb) (1.2.14)\n",
|
112 |
+
"Requirement already satisfied: importlib-metadata<=7.1,>=6.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-api>=1.2.0->chromadb) (6.11.0)\n",
|
113 |
+
"Requirement already satisfied: googleapis-common-protos~=1.52 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.63.2)\n",
|
114 |
+
"Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.25.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.25.0)\n",
|
115 |
+
"Requirement already satisfied: opentelemetry-proto==1.25.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.25.0)\n",
|
116 |
+
"Requirement already satisfied: opentelemetry-instrumentation-asgi==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n",
|
117 |
+
"Requirement already satisfied: opentelemetry-instrumentation==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n",
|
118 |
+
"Requirement already satisfied: opentelemetry-semantic-conventions==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n",
|
119 |
+
"Requirement already satisfied: opentelemetry-util-http==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n",
|
120 |
+
"Requirement already satisfied: wrapt<2.0.0,>=1.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation==0.46b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (1.14.1)\n",
|
121 |
+
"Requirement already satisfied: asgiref~=3.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-asgi==0.46b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (3.8.1)\n",
|
122 |
+
"Requirement already satisfied: monotonic>=1.5 in /opt/anaconda3/lib/python3.12/site-packages (from posthog>=2.4.0->chromadb) (1.6)\n",
|
123 |
+
"Requirement already satisfied: backoff>=1.10.0 in /opt/anaconda3/lib/python3.12/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n",
|
124 |
+
"Requirement already satisfied: annotated-types>=0.6.0 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.6.0)\n",
|
125 |
+
"Requirement already satisfied: pydantic-core==2.23.4 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.23.4)\n",
|
126 |
+
"Requirement already satisfied: python-dotenv>=0.21.0 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain_community) (1.0.0)\n",
|
127 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/lib/python3.12/site-packages (from requests<3,>=2->langchain) (3.3.2)\n",
|
128 |
+
"Requirement already satisfied: regex>=2022.1.18 in /opt/anaconda3/lib/python3.12/site-packages (from tiktoken<1,>=0.7->langchain_openai) (2023.10.3)\n",
|
129 |
+
"Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /opt/anaconda3/lib/python3.12/site-packages (from tokenizers>=0.13.2->chromadb) (0.23.4)\n",
|
130 |
+
"Requirement already satisfied: click>=8.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from typer>=0.9.0->chromadb) (8.1.7)\n",
|
131 |
+
"Requirement already satisfied: shellingham>=1.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from typer>=0.9.0->chromadb) (1.5.4)\n",
|
132 |
+
"Requirement already satisfied: rich>=10.11.0 in /opt/anaconda3/lib/python3.12/site-packages (from typer>=0.9.0->chromadb) (13.3.5)\n",
|
133 |
+
"Requirement already satisfied: httptools>=0.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.1)\n",
|
134 |
+
"Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.19.0)\n",
|
135 |
+
"Requirement already satisfied: watchfiles>=0.13 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.20.0)\n",
|
136 |
+
"Requirement already satisfied: websockets>=10.4 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (12.0)\n",
|
137 |
+
"Requirement already satisfied: filelock in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (3.13.1)\n",
|
138 |
+
"Requirement already satisfied: pyarrow>=15.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (17.0.0)\n",
|
139 |
+
"Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (0.3.8)\n",
|
140 |
+
"Requirement already satisfied: xxhash in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (3.5.0)\n",
|
141 |
+
"Requirement already satisfied: multiprocess in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (0.70.16)\n",
|
142 |
+
"Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /opt/anaconda3/lib/python3.12/site-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets->ragas) (2024.3.1)\n",
|
143 |
+
"Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (5.3.3)\n",
|
144 |
+
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/anaconda3/lib/python3.12/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.2.8)\n",
|
145 |
+
"Requirement already satisfied: rsa<5,>=3.1.4 in /opt/anaconda3/lib/python3.12/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (4.9)\n",
|
146 |
+
"Requirement already satisfied: hyperframe<7,>=6.0 in /opt/anaconda3/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant-client) (6.0.1)\n",
|
147 |
+
"Requirement already satisfied: hpack<5,>=4.0 in /opt/anaconda3/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant-client) (4.0.0)\n",
|
148 |
+
"Requirement already satisfied: zipp>=0.5 in /opt/anaconda3/lib/python3.12/site-packages (from importlib-metadata<=7.1,>=6.0->opentelemetry-api>=1.2.0->chromadb) (3.17.0)\n",
|
149 |
+
"Requirement already satisfied: jsonpointer>=1.9 in /opt/anaconda3/lib/python3.12/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.0->langchain) (2.1)\n",
|
150 |
+
"Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (2.2.0)\n",
|
151 |
+
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/anaconda3/lib/python3.12/site-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (2.15.1)\n",
|
152 |
+
"Requirement already satisfied: mypy-extensions>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community) (1.0.0)\n",
|
153 |
+
"Requirement already satisfied: humanfriendly>=9.1 in /opt/anaconda3/lib/python3.12/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n",
|
154 |
+
"Requirement already satisfied: mpmath>=0.19 in /opt/anaconda3/lib/python3.12/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n",
|
155 |
+
"Requirement already satisfied: mdurl~=0.1 in /opt/anaconda3/lib/python3.12/site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich>=10.11.0->typer>=0.9.0->chromadb) (0.1.0)\n",
|
156 |
+
"Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /opt/anaconda3/lib/python3.12/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.4.8)\n",
|
157 |
+
"Using cached langchain_community-0.3.0-py3-none-any.whl (2.3 MB)\n",
|
158 |
+
"Downloading ragas-0.1.19-py3-none-any.whl (190 kB)\n",
|
159 |
+
"Installing collected packages: langchain_community, ragas\n",
|
160 |
+
" Attempting uninstall: langchain_community\n",
|
161 |
+
" Found existing installation: langchain-community 0.2.17\n",
|
162 |
+
" Uninstalling langchain-community-0.2.17:\n",
|
163 |
+
" Successfully uninstalled langchain-community-0.2.17\n",
|
164 |
+
" Attempting uninstall: ragas\n",
|
165 |
+
" Found existing installation: ragas 0.1.20\n",
|
166 |
+
" Uninstalling ragas-0.1.20:\n",
|
167 |
+
" Successfully uninstalled ragas-0.1.20\n",
|
168 |
+
"Successfully installed langchain_community-0.3.0 ragas-0.1.19\n"
|
169 |
+
]
|
170 |
+
}
|
171 |
+
],
|
172 |
"source": [
|
173 |
"!pip install langchain langchain_community langchain_openai chromadb pypdf langsmith qdrant-client ragas pandas"
|
174 |
]
|
|
|
245 |
},
|
246 |
{
|
247 |
"cell_type": "code",
|
248 |
+
"execution_count": 4,
|
249 |
"metadata": {},
|
250 |
"outputs": [
|
251 |
{
|
|
|
272 |
},
|
273 |
{
|
274 |
"cell_type": "code",
|
275 |
+
"execution_count": 2,
|
276 |
"metadata": {},
|
277 |
"outputs": [],
|
278 |
"source": [
|
|
|
282 |
},
|
283 |
{
|
284 |
"cell_type": "code",
|
285 |
+
"execution_count": 5,
|
286 |
"metadata": {},
|
287 |
"outputs": [],
|
288 |
"source": [
|
|
|
294 |
},
|
295 |
{
|
296 |
"cell_type": "code",
|
297 |
+
"execution_count": 6,
|
298 |
"metadata": {},
|
299 |
"outputs": [],
|
300 |
"source": [
|
|
|
305 |
"baseline_docs = text_splitter.split_documents(pdf_documents)"
|
306 |
]
|
307 |
},
|
308 |
+
{
|
309 |
+
"cell_type": "code",
|
310 |
+
"execution_count": 7,
|
311 |
+
"metadata": {},
|
312 |
+
"outputs": [
|
313 |
+
{
|
314 |
+
"data": {
|
315 |
+
"text/plain": [
|
316 |
+
"524"
|
317 |
+
]
|
318 |
+
},
|
319 |
+
"execution_count": 7,
|
320 |
+
"metadata": {},
|
321 |
+
"output_type": "execute_result"
|
322 |
+
}
|
323 |
+
],
|
324 |
+
"source": [
|
325 |
+
"len(baseline_docs)"
|
326 |
+
]
|
327 |
+
},
|
328 |
{
|
329 |
"cell_type": "code",
|
330 |
"execution_count": 13,
|
|
|
887 |
"df_baseline.to_csv(\"df_baseline_metrics.csv\", index=False)"
|
888 |
]
|
889 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
890 |
{
|
891 |
"cell_type": "code",
|
892 |
"execution_count": 27,
|
|
|
3140 |
"name": "python",
|
3141 |
"nbconvert_exporter": "python",
|
3142 |
"pygments_lexer": "ipython3",
|
3143 |
+
"version": "3.12.4"
|
3144 |
},
|
3145 |
"widgets": {
|
3146 |
"application/vnd.jupyter.widget-state+json": {
|
midterm_fine_tune_embeddings_model.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|