xicocdi commited on
Commit
a942057
1 Parent(s): 029f20b

push evaluation notebooks

Browse files
Chunking_Strat_Eval.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": null,
6
  "metadata": {
7
  "colab": {
8
  "base_uri": "https://localhost:8080/"
@@ -10,7 +10,165 @@
10
  "id": "5BN13TZlSCv4",
11
  "outputId": "424a6920-0cea-4e28-dce0-3de6f0a4cc3c"
12
  },
13
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "source": [
15
  "!pip install langchain langchain_community langchain_openai chromadb pypdf langsmith qdrant-client ragas pandas"
16
  ]
@@ -87,7 +245,7 @@
87
  },
88
  {
89
  "cell_type": "code",
90
- "execution_count": 9,
91
  "metadata": {},
92
  "outputs": [
93
  {
@@ -114,7 +272,7 @@
114
  },
115
  {
116
  "cell_type": "code",
117
- "execution_count": 10,
118
  "metadata": {},
119
  "outputs": [],
120
  "source": [
@@ -124,7 +282,7 @@
124
  },
125
  {
126
  "cell_type": "code",
127
- "execution_count": 11,
128
  "metadata": {},
129
  "outputs": [],
130
  "source": [
@@ -136,7 +294,7 @@
136
  },
137
  {
138
  "cell_type": "code",
139
- "execution_count": 12,
140
  "metadata": {},
141
  "outputs": [],
142
  "source": [
@@ -147,6 +305,26 @@
147
  "baseline_docs = text_splitter.split_documents(pdf_documents)"
148
  ]
149
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  {
151
  "cell_type": "code",
152
  "execution_count": 13,
@@ -709,117 +887,6 @@
709
  "df_baseline.to_csv(\"df_baseline_metrics.csv\", index=False)"
710
  ]
711
  },
712
- {
713
- "cell_type": "code",
714
- "execution_count": 26,
715
- "metadata": {},
716
- "outputs": [],
717
- "source": [
718
- "from datetime import datetime, timedelta"
719
- ]
720
- },
721
- {
722
- "cell_type": "code",
723
- "execution_count": 45,
724
- "metadata": {},
725
- "outputs": [],
726
- "source": [
727
- "api_key = os.environ[\"LANGCHAIN_API_KEY\"]"
728
- ]
729
- },
730
- {
731
- "cell_type": "code",
732
- "execution_count": 39,
733
- "metadata": {},
734
- "outputs": [],
735
- "source": [
736
- "import uuid\n",
737
- "import requests"
738
- ]
739
- },
740
- {
741
- "cell_type": "code",
742
- "execution_count": 46,
743
- "metadata": {},
744
- "outputs": [],
745
- "source": [
746
- "unique_dataset_id = str(uuid.uuid4())\n",
747
- "dataset_name = f\"RAGAS Midterm Eval Dataset - {unique_dataset_id[:8]}\""
748
- ]
749
- },
750
- {
751
- "cell_type": "code",
752
- "execution_count": 47,
753
- "metadata": {},
754
- "outputs": [
755
- {
756
- "name": "stdout",
757
- "output_type": "stream",
758
- "text": [
759
- "403\n",
760
- "{'detail': 'Forbidden'}\n"
761
- ]
762
- }
763
- ],
764
- "source": [
765
- "experiment_start_time = datetime.now()\n",
766
- "experiment_end_time = experiment_start_time + timedelta(minutes=30) # Adjust as needed\n",
767
- "\n",
768
- "results = []\n",
769
- "for index, row in results_df.iterrows():\n",
770
- " start_time = experiment_start_time + timedelta(seconds=index)\n",
771
- " end_time = start_time + timedelta(seconds=1)\n",
772
- " results.append({\n",
773
- " \"row_id\": str(uuid.uuid4()),\n",
774
- " \"inputs\": {\"question\": row[\"question\"]},\n",
775
- " \"expected_outputs\": {\"ground_truth\": row[\"ground_truth\"]},\n",
776
- " \"actual_outputs\": {\"answer\": row[\"answer\"]},\n",
777
- " \"evaluation_scores\": [\n",
778
- " {\"key\": \"faithfulness\", \"score\": row[\"faithfulness\"]},\n",
779
- " {\"key\": \"answer_relevancy\", \"score\": row[\"answer_relevancy\"]},\n",
780
- " {\"key\": \"context_recall\", \"score\": row[\"context_recall\"]},\n",
781
- " {\"key\": \"context_precision\", \"score\": row[\"context_precision\"]},\n",
782
- " {\"key\": \"answer_correctness\", \"score\": row[\"answer_correctness\"]}\n",
783
- " ],\n",
784
- " \"start_time\": start_time.isoformat(),\n",
785
- " \"end_time\": end_time.isoformat(),\n",
786
- " \"run_name\": f\"Baseline Run {index}\"\n",
787
- " })\n",
788
- "\n",
789
- "summary_scores = [\n",
790
- " {\"key\": \"faithfulness\", \"score\": results_df[\"faithfulness\"].mean(), \"comment\": \"Average faithfulness score\"},\n",
791
- " {\"key\": \"answer_relevancy\", \"score\": results_df[\"answer_relevancy\"].mean(), \"comment\": \"Average answer relevancy score\"},\n",
792
- " {\"key\": \"context_recall\", \"score\": results_df[\"context_recall\"].mean(), \"comment\": \"Average context recall score\"},\n",
793
- " {\"key\": \"context_precision\", \"score\": results_df[\"context_precision\"].mean(), \"comment\": \"Average context precision score\"},\n",
794
- " {\"key\": \"answer_correctness\", \"score\": results_df[\"answer_correctness\"].mean(), \"comment\": \"Average answer correctness score\"}\n",
795
- "]\n",
796
- "\n",
797
- "body = {\n",
798
- " \"experiment_name\": \"Baseline Midterm Evaluation\",\n",
799
- " \"experiment_description\": \"Baseline evaluation of Midterm Evaluation using Ragas metrics\",\n",
800
- " \"dataset_name\": dataset_name,\n",
801
- " \"dataset_description\": \"Dataset for RAGBot evaluation using Ragas metrics\",\n",
802
- " \"experiment_start_time\": experiment_start_time.isoformat(),\n",
803
- " \"experiment_end_time\": experiment_end_time.isoformat(),\n",
804
- " \"experiment_metadata\": {\n",
805
- " \"model\": \"gpt-4o-mini\",\n",
806
- " \"retriever\": \"Qdrant with MMR\",\n",
807
- " \"chunk_size\": \"1000 w/ 200 overlap\"\n",
808
- " },\n",
809
- " \"summary_experiment_scores\": summary_scores,\n",
810
- " \"results\": results\n",
811
- "}\n",
812
- "\n",
813
- "response = requests.post(\n",
814
- " \"https://api.smith.langchain.com/api/v1/datasets/upload-experiment\",\n",
815
- " json=body,\n",
816
- " headers={\"x-api-key\": api_key}\n",
817
- ")\n",
818
- "\n",
819
- "print(response.status_code)\n",
820
- "print(response.json())"
821
- ]
822
- },
823
  {
824
  "cell_type": "code",
825
  "execution_count": 27,
@@ -3073,7 +3140,7 @@
3073
  "name": "python",
3074
  "nbconvert_exporter": "python",
3075
  "pygments_lexer": "ipython3",
3076
- "version": "3.12.0"
3077
  },
3078
  "widgets": {
3079
  "application/vnd.jupyter.widget-state+json": {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {
7
  "colab": {
8
  "base_uri": "https://localhost:8080/"
 
10
  "id": "5BN13TZlSCv4",
11
  "outputId": "424a6920-0cea-4e28-dce0-3de6f0a4cc3c"
12
  },
13
+ "outputs": [
14
+ {
15
+ "name": "stdout",
16
+ "output_type": "stream",
17
+ "text": [
18
+ "Requirement already satisfied: langchain in /opt/anaconda3/lib/python3.12/site-packages (0.3.0)\n",
19
+ "Requirement already satisfied: langchain_community in /opt/anaconda3/lib/python3.12/site-packages (0.2.17)\n",
20
+ "Requirement already satisfied: langchain_openai in /opt/anaconda3/lib/python3.12/site-packages (0.2.0)\n",
21
+ "Requirement already satisfied: chromadb in /opt/anaconda3/lib/python3.12/site-packages (0.5.5)\n",
22
+ "Requirement already satisfied: pypdf in /opt/anaconda3/lib/python3.12/site-packages (5.0.0)\n",
23
+ "Requirement already satisfied: langsmith in /opt/anaconda3/lib/python3.12/site-packages (0.1.125)\n",
24
+ "Requirement already satisfied: qdrant-client in /opt/anaconda3/lib/python3.12/site-packages (1.11.1)\n",
25
+ "Requirement already satisfied: ragas in /opt/anaconda3/lib/python3.12/site-packages (0.1.20)\n",
26
+ "Requirement already satisfied: pandas in /opt/anaconda3/lib/python3.12/site-packages (2.2.2)\n",
27
+ "Requirement already satisfied: PyYAML>=5.3 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (6.0.1)\n",
28
+ "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (2.0.30)\n",
29
+ "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (3.9.5)\n",
30
+ "Requirement already satisfied: langchain-core<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (0.3.2)\n",
31
+ "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (0.3.0)\n",
32
+ "Requirement already satisfied: numpy<2.0.0,>=1.26.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (1.26.4)\n",
33
+ "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (2.9.2)\n",
34
+ "Requirement already satisfied: requests<3,>=2 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (2.32.2)\n",
35
+ "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain) (8.5.0)\n",
36
+ "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_community) (0.5.14)\n",
37
+ "Collecting langchain\n",
38
+ " Using cached langchain-0.2.16-py3-none-any.whl.metadata (7.1 kB)\n",
39
+ "INFO: pip is looking at multiple versions of langchain-community to determine which version is compatible with other requirements. This could take a while.\n",
40
+ "Collecting langchain_community\n",
41
+ " Using cached langchain_community-0.3.0-py3-none-any.whl.metadata (2.8 kB)\n",
42
+ "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_community) (2.5.2)\n",
43
+ "Requirement already satisfied: openai<2.0.0,>=1.40.0 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_openai) (1.46.0)\n",
44
+ "Requirement already satisfied: tiktoken<1,>=0.7 in /opt/anaconda3/lib/python3.12/site-packages (from langchain_openai) (0.7.0)\n",
45
+ "Requirement already satisfied: build>=1.0.3 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.2.1)\n",
46
+ "Requirement already satisfied: chroma-hnswlib==0.7.6 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.7.6)\n",
47
+ "Requirement already satisfied: fastapi>=0.95.2 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.110.3)\n",
48
+ "Requirement already satisfied: uvicorn>=0.18.3 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.25.0)\n",
49
+ "Requirement already satisfied: posthog>=2.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (3.5.0)\n",
50
+ "Requirement already satisfied: typing-extensions>=4.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.11.0)\n",
51
+ "Requirement already satisfied: onnxruntime>=1.14.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.18.1)\n",
52
+ "Requirement already satisfied: opentelemetry-api>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.25.0)\n",
53
+ "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.25.0)\n",
54
+ "Requirement already satisfied: opentelemetry-instrumentation-fastapi>=0.41b0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.46b0)\n",
55
+ "Requirement already satisfied: opentelemetry-sdk>=1.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.25.0)\n",
56
+ "Requirement already satisfied: tokenizers>=0.13.2 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.19.1)\n",
57
+ "Requirement already satisfied: pypika>=0.48.9 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.48.9)\n",
58
+ "Requirement already satisfied: tqdm>=4.65.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.66.4)\n",
59
+ "Requirement already satisfied: overrides>=7.3.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (7.4.0)\n",
60
+ "Requirement already satisfied: importlib-resources in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (6.4.0)\n",
61
+ "Requirement already satisfied: grpcio>=1.58.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (1.66.1)\n",
62
+ "Requirement already satisfied: bcrypt>=4.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.2.0)\n",
63
+ "Requirement already satisfied: typer>=0.9.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.12.3)\n",
64
+ "Requirement already satisfied: kubernetes>=28.1.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (30.1.0)\n",
65
+ "Requirement already satisfied: mmh3>=4.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (4.1.0)\n",
66
+ "Requirement already satisfied: orjson>=3.9.12 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (3.10.6)\n",
67
+ "Requirement already satisfied: httpx>=0.27.0 in /opt/anaconda3/lib/python3.12/site-packages (from chromadb) (0.27.0)\n",
68
+ "Requirement already satisfied: grpcio-tools>=1.41.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client) (1.62.3)\n",
69
+ "Requirement already satisfied: portalocker<3.0.0,>=2.7.0 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client) (2.10.1)\n",
70
+ "Requirement already satisfied: urllib3<3,>=1.26.14 in /opt/anaconda3/lib/python3.12/site-packages (from qdrant-client) (2.2.2)\n",
71
+ "Requirement already satisfied: datasets in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (3.0.0)\n",
72
+ "INFO: pip is looking at multiple versions of ragas to determine which version is compatible with other requirements. This could take a while.\n",
73
+ "Collecting ragas\n",
74
+ " Downloading ragas-0.1.19-py3-none-any.whl.metadata (5.4 kB)\n",
75
+ "Requirement already satisfied: pysbd>=0.3.4 in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (0.3.4)\n",
76
+ "Requirement already satisfied: nest-asyncio in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (1.6.0)\n",
77
+ "Requirement already satisfied: appdirs in /opt/anaconda3/lib/python3.12/site-packages (from ragas) (1.4.4)\n",
78
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2.9.0.post0)\n",
79
+ "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2024.1)\n",
80
+ "Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/lib/python3.12/site-packages (from pandas) (2023.3)\n",
81
+ "Requirement already satisfied: aiosignal>=1.1.2 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.2.0)\n",
82
+ "Requirement already satisfied: attrs>=17.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\n",
83
+ "Requirement already satisfied: frozenlist>=1.1.1 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.0)\n",
84
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n",
85
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/anaconda3/lib/python3.12/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.3)\n",
86
+ "Requirement already satisfied: packaging>=19.1 in /opt/anaconda3/lib/python3.12/site-packages (from build>=1.0.3->chromadb) (23.2)\n",
87
+ "Requirement already satisfied: pyproject_hooks in /opt/anaconda3/lib/python3.12/site-packages (from build>=1.0.3->chromadb) (1.1.0)\n",
88
+ "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /opt/anaconda3/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community) (3.21.3)\n",
89
+ "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain_community) (0.9.0)\n",
90
+ "Requirement already satisfied: starlette<0.38.0,>=0.37.2 in /opt/anaconda3/lib/python3.12/site-packages (from fastapi>=0.95.2->chromadb) (0.37.2)\n",
91
+ "Requirement already satisfied: protobuf<5.0dev,>=4.21.6 in /opt/anaconda3/lib/python3.12/site-packages (from grpcio-tools>=1.41.0->qdrant-client) (4.25.4)\n",
92
+ "Requirement already satisfied: setuptools in /opt/anaconda3/lib/python3.12/site-packages (from grpcio-tools>=1.41.0->qdrant-client) (69.5.1)\n",
93
+ "Requirement already satisfied: anyio in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (3.7.1)\n",
94
+ "Requirement already satisfied: certifi in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (2024.6.2)\n",
95
+ "Requirement already satisfied: httpcore==1.* in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (1.0.5)\n",
96
+ "Requirement already satisfied: idna in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (3.7)\n",
97
+ "Requirement already satisfied: sniffio in /opt/anaconda3/lib/python3.12/site-packages (from httpx>=0.27.0->chromadb) (1.3.0)\n",
98
+ "Requirement already satisfied: h11<0.15,>=0.13 in /opt/anaconda3/lib/python3.12/site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb) (0.14.0)\n",
99
+ "Requirement already satisfied: h2<5,>=3 in /opt/anaconda3/lib/python3.12/site-packages (from httpx[http2]>=0.20.0->qdrant-client) (4.1.0)\n",
100
+ "Requirement already satisfied: six>=1.9.0 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (1.16.0)\n",
101
+ "Requirement already satisfied: google-auth>=1.0.1 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (2.33.0)\n",
102
+ "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (1.8.0)\n",
103
+ "Requirement already satisfied: requests-oauthlib in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (2.0.0)\n",
104
+ "Requirement already satisfied: oauthlib>=3.2.2 in /opt/anaconda3/lib/python3.12/site-packages (from kubernetes>=28.1.0->chromadb) (3.2.2)\n",
105
+ "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /opt/anaconda3/lib/python3.12/site-packages (from langchain-core<0.4.0,>=0.3.0->langchain) (1.33)\n",
106
+ "Requirement already satisfied: coloredlogs in /opt/anaconda3/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb) (15.0.1)\n",
107
+ "Requirement already satisfied: flatbuffers in /opt/anaconda3/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb) (24.3.25)\n",
108
+ "Requirement already satisfied: sympy in /opt/anaconda3/lib/python3.12/site-packages (from onnxruntime>=1.14.1->chromadb) (1.12)\n",
109
+ "Requirement already satisfied: distro<2,>=1.7.0 in /opt/anaconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.40.0->langchain_openai) (1.9.0)\n",
110
+ "Requirement already satisfied: jiter<1,>=0.4.0 in /opt/anaconda3/lib/python3.12/site-packages (from openai<2.0.0,>=1.40.0->langchain_openai) (0.5.0)\n",
111
+ "Requirement already satisfied: deprecated>=1.2.6 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-api>=1.2.0->chromadb) (1.2.14)\n",
112
+ "Requirement already satisfied: importlib-metadata<=7.1,>=6.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-api>=1.2.0->chromadb) (6.11.0)\n",
113
+ "Requirement already satisfied: googleapis-common-protos~=1.52 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.63.2)\n",
114
+ "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.25.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.25.0)\n",
115
+ "Requirement already satisfied: opentelemetry-proto==1.25.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb) (1.25.0)\n",
116
+ "Requirement already satisfied: opentelemetry-instrumentation-asgi==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n",
117
+ "Requirement already satisfied: opentelemetry-instrumentation==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n",
118
+ "Requirement already satisfied: opentelemetry-semantic-conventions==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n",
119
+ "Requirement already satisfied: opentelemetry-util-http==0.46b0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (0.46b0)\n",
120
+ "Requirement already satisfied: wrapt<2.0.0,>=1.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation==0.46b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (1.14.1)\n",
121
+ "Requirement already satisfied: asgiref~=3.0 in /opt/anaconda3/lib/python3.12/site-packages (from opentelemetry-instrumentation-asgi==0.46b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb) (3.8.1)\n",
122
+ "Requirement already satisfied: monotonic>=1.5 in /opt/anaconda3/lib/python3.12/site-packages (from posthog>=2.4.0->chromadb) (1.6)\n",
123
+ "Requirement already satisfied: backoff>=1.10.0 in /opt/anaconda3/lib/python3.12/site-packages (from posthog>=2.4.0->chromadb) (2.2.1)\n",
124
+ "Requirement already satisfied: annotated-types>=0.6.0 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.6.0)\n",
125
+ "Requirement already satisfied: pydantic-core==2.23.4 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.23.4)\n",
126
+ "Requirement already satisfied: python-dotenv>=0.21.0 in /opt/anaconda3/lib/python3.12/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain_community) (1.0.0)\n",
127
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/lib/python3.12/site-packages (from requests<3,>=2->langchain) (3.3.2)\n",
128
+ "Requirement already satisfied: regex>=2022.1.18 in /opt/anaconda3/lib/python3.12/site-packages (from tiktoken<1,>=0.7->langchain_openai) (2023.10.3)\n",
129
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.16.4 in /opt/anaconda3/lib/python3.12/site-packages (from tokenizers>=0.13.2->chromadb) (0.23.4)\n",
130
+ "Requirement already satisfied: click>=8.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from typer>=0.9.0->chromadb) (8.1.7)\n",
131
+ "Requirement already satisfied: shellingham>=1.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from typer>=0.9.0->chromadb) (1.5.4)\n",
132
+ "Requirement already satisfied: rich>=10.11.0 in /opt/anaconda3/lib/python3.12/site-packages (from typer>=0.9.0->chromadb) (13.3.5)\n",
133
+ "Requirement already satisfied: httptools>=0.5.0 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.6.1)\n",
134
+ "Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.19.0)\n",
135
+ "Requirement already satisfied: watchfiles>=0.13 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.20.0)\n",
136
+ "Requirement already satisfied: websockets>=10.4 in /opt/anaconda3/lib/python3.12/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (12.0)\n",
137
+ "Requirement already satisfied: filelock in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (3.13.1)\n",
138
+ "Requirement already satisfied: pyarrow>=15.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (17.0.0)\n",
139
+ "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (0.3.8)\n",
140
+ "Requirement already satisfied: xxhash in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (3.5.0)\n",
141
+ "Requirement already satisfied: multiprocess in /opt/anaconda3/lib/python3.12/site-packages (from datasets->ragas) (0.70.16)\n",
142
+ "Requirement already satisfied: fsspec<=2024.6.1,>=2023.1.0 in /opt/anaconda3/lib/python3.12/site-packages (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets->ragas) (2024.3.1)\n",
143
+ "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /opt/anaconda3/lib/python3.12/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (5.3.3)\n",
144
+ "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/anaconda3/lib/python3.12/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.2.8)\n",
145
+ "Requirement already satisfied: rsa<5,>=3.1.4 in /opt/anaconda3/lib/python3.12/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (4.9)\n",
146
+ "Requirement already satisfied: hyperframe<7,>=6.0 in /opt/anaconda3/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant-client) (6.0.1)\n",
147
+ "Requirement already satisfied: hpack<5,>=4.0 in /opt/anaconda3/lib/python3.12/site-packages (from h2<5,>=3->httpx[http2]>=0.20.0->qdrant-client) (4.0.0)\n",
148
+ "Requirement already satisfied: zipp>=0.5 in /opt/anaconda3/lib/python3.12/site-packages (from importlib-metadata<=7.1,>=6.0->opentelemetry-api>=1.2.0->chromadb) (3.17.0)\n",
149
+ "Requirement already satisfied: jsonpointer>=1.9 in /opt/anaconda3/lib/python3.12/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.0->langchain) (2.1)\n",
150
+ "Requirement already satisfied: markdown-it-py<3.0.0,>=2.2.0 in /opt/anaconda3/lib/python3.12/site-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (2.2.0)\n",
151
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/anaconda3/lib/python3.12/site-packages (from rich>=10.11.0->typer>=0.9.0->chromadb) (2.15.1)\n",
152
+ "Requirement already satisfied: mypy-extensions>=0.3.0 in /opt/anaconda3/lib/python3.12/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community) (1.0.0)\n",
153
+ "Requirement already satisfied: humanfriendly>=9.1 in /opt/anaconda3/lib/python3.12/site-packages (from coloredlogs->onnxruntime>=1.14.1->chromadb) (10.0)\n",
154
+ "Requirement already satisfied: mpmath>=0.19 in /opt/anaconda3/lib/python3.12/site-packages (from sympy->onnxruntime>=1.14.1->chromadb) (1.3.0)\n",
155
+ "Requirement already satisfied: mdurl~=0.1 in /opt/anaconda3/lib/python3.12/site-packages (from markdown-it-py<3.0.0,>=2.2.0->rich>=10.11.0->typer>=0.9.0->chromadb) (0.1.0)\n",
156
+ "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /opt/anaconda3/lib/python3.12/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb) (0.4.8)\n",
157
+ "Using cached langchain_community-0.3.0-py3-none-any.whl (2.3 MB)\n",
158
+ "Downloading ragas-0.1.19-py3-none-any.whl (190 kB)\n",
159
+ "Installing collected packages: langchain_community, ragas\n",
160
+ " Attempting uninstall: langchain_community\n",
161
+ " Found existing installation: langchain-community 0.2.17\n",
162
+ " Uninstalling langchain-community-0.2.17:\n",
163
+ " Successfully uninstalled langchain-community-0.2.17\n",
164
+ " Attempting uninstall: ragas\n",
165
+ " Found existing installation: ragas 0.1.20\n",
166
+ " Uninstalling ragas-0.1.20:\n",
167
+ " Successfully uninstalled ragas-0.1.20\n",
168
+ "Successfully installed langchain_community-0.3.0 ragas-0.1.19\n"
169
+ ]
170
+ }
171
+ ],
172
  "source": [
173
  "!pip install langchain langchain_community langchain_openai chromadb pypdf langsmith qdrant-client ragas pandas"
174
  ]
 
245
  },
246
  {
247
  "cell_type": "code",
248
+ "execution_count": 4,
249
  "metadata": {},
250
  "outputs": [
251
  {
 
272
  },
273
  {
274
  "cell_type": "code",
275
+ "execution_count": 2,
276
  "metadata": {},
277
  "outputs": [],
278
  "source": [
 
282
  },
283
  {
284
  "cell_type": "code",
285
+ "execution_count": 5,
286
  "metadata": {},
287
  "outputs": [],
288
  "source": [
 
294
  },
295
  {
296
  "cell_type": "code",
297
+ "execution_count": 6,
298
  "metadata": {},
299
  "outputs": [],
300
  "source": [
 
305
  "baseline_docs = text_splitter.split_documents(pdf_documents)"
306
  ]
307
  },
308
+ {
309
+ "cell_type": "code",
310
+ "execution_count": 7,
311
+ "metadata": {},
312
+ "outputs": [
313
+ {
314
+ "data": {
315
+ "text/plain": [
316
+ "524"
317
+ ]
318
+ },
319
+ "execution_count": 7,
320
+ "metadata": {},
321
+ "output_type": "execute_result"
322
+ }
323
+ ],
324
+ "source": [
325
+ "len(baseline_docs)"
326
+ ]
327
+ },
328
  {
329
  "cell_type": "code",
330
  "execution_count": 13,
 
887
  "df_baseline.to_csv(\"df_baseline_metrics.csv\", index=False)"
888
  ]
889
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  {
891
  "cell_type": "code",
892
  "execution_count": 27,
 
3140
  "name": "python",
3141
  "nbconvert_exporter": "python",
3142
  "pygments_lexer": "ipython3",
3143
+ "version": "3.12.4"
3144
  },
3145
  "widgets": {
3146
  "application/vnd.jupyter.widget-state+json": {
midterm_fine_tune_embeddings_model.ipynb ADDED
The diff for this file is too large to render. See raw diff