File size: 26,080 Bytes
919910a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/adityasugandhi/Documents/GitHub/LLM_Playground/.newenv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/Users/adityasugandhi/Documents/GitHub/LLM_Playground\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Batches: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:03<00:00,  3.22s/it]\n",
      "/Users/adityasugandhi/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 79.3M/79.3M [00:11<00:00, 7.14MiB/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'retriever': {'documents': [Document(id=fee80856fdb487fb694c739e089614d733502a7bd6d8b192f29ed6dad2088f44, content: 'Vishwam Shah is a highly motivated and skilled Computer Science professional currently pursuing a Ma...', meta: {'file_path': '/Users/adityasugandhi/Documents/GitHub/LLM_Playground/data/mf.txt', 'source_id': '99393e97120fcb9e88daa2d490060e9a91385ae63c7890d12b351978c02d3d93'}, score: 1.0066444873809814, embedding: vector of size 384), Document(id=e700bf2b5df175311a60ca00ffb6ed77b65b09c4221a2466b68e4802d90a831a, content: 'VISHWAM SHAH\n",
      "Tallahassee, FL |[email protected] |+1 (850) 666 - 0095 |https://www.linkedin.com/...', meta: {'file_path': '/Users/adityasugandhi/Documents/GitHub/LLM_Playground/data/Resume_Vishwam_Shah_Back_end.pdf', 'source_id': 'd23089ee94ea955eb9ef0045999019220184668c96631b25686fc002722e8753'}, score: 1.5628944635391235, embedding: vector of size 384), Document(id=299afa7bfc84e7700fd38b178933ab2bf3a67b09298662651b173af03fde7968, content: ' The\n",
      "β€œECMWF Parameter ID” column is a ECMWF’s numeric label, and can be used to construct the URL fo...', meta: {'file_path': '/Users/adityasugandhi/Documents/GitHub/LLM_Playground/data/2212.12794.pdf', 'source_id': '314ee646f1f3143cad0677f2cdf057f1d625e5f2a1891449011557e1f75249d5'}, score: 1.6514018774032593, embedding: vector of size 384)]}}\n"
     ]
    }
   ],
   "source": [
    "from pathlib import Path\n",
    "import os\n",
    "from haystack import Pipeline\n",
    "from haystack.components.embedders import SentenceTransformersDocumentEmbedder,SentenceTransformersTextEmbedder\n",
    "from haystack.components.converters import PyPDFToDocument, TextFileToDocument\n",
    "from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter\n",
    "from haystack.components.routers import FileTypeRouter\n",
    "from haystack.components.joiners import DocumentJoiner\n",
    "from haystack.components.writers import DocumentWriter\n",
    "from haystack_integrations.document_stores.chroma import ChromaDocumentStore\n",
    "from haystack_integrations.components.retrievers.chroma import ChromaQueryTextRetriever\n",
    "\n",
    "HERE = Path(os.getcwd())\n",
    "print(HERE)\n",
    "\n",
    "data_path = HERE / \"data\"\n",
    "file_paths = [str(data_path / name) for name in os.listdir(data_path)]\n",
    "\n",
    "chroma_store = ChromaDocumentStore()\n",
    "\n",
    "pipeline = Pipeline()\n",
    "pipeline.add_component(\"FileTypeRouter\", FileTypeRouter(mime_types=[\"text/plain\", \"application/pdf\"]))\n",
    "pipeline.add_component(\"TextFileConverter\", TextFileToDocument())\n",
    "pipeline.add_component(\"PdfFileConverter\", PyPDFToDocument())\n",
    "\n",
    "pipeline.add_component(\"Joiner\", DocumentJoiner())\n",
    "pipeline.add_component(\"Cleaner\", DocumentCleaner())\n",
    "pipeline.add_component(\"Splitter\", DocumentSplitter(split_by=\"sentence\", split_length=250, split_overlap=30))\n",
    "# pipeline.add_component(\"TextEmbedder\", SentenceTransformersTextEmbedder())\n",
    "pipeline.add_component(\"Embedder\", SentenceTransformersDocumentEmbedder(model=\"sentence-transformers/all-MiniLM-L6-v2\"))\n",
    "\n",
    "pipeline.add_component(\"Writer\", DocumentWriter(document_store=chroma_store))\n",
    "\n",
    "pipeline.connect(\"FileTypeRouter.text/plain\", \"TextFileConverter.sources\")\n",
    "pipeline.connect(\"FileTypeRouter.application/pdf\", \"PdfFileConverter.sources\")\n",
    "pipeline.connect(\"TextFileConverter.documents\", \"Joiner.documents\")\n",
    "pipeline.connect(\"PdfFileConverter.documents\", \"Joiner.documents\")\n",
    "pipeline.connect(\"Joiner.documents\", \"Cleaner.documents\")\n",
    "pipeline.connect(\"Cleaner.documents\", \"Splitter.documents\")\n",
    "pipeline.connect(\"Splitter.documents\", \"Embedder.documents\")\n",
    "# pipeline.connect(\"TextEmbedder.embeddings\", \"Embedder.documents\")\n",
    "pipeline.connect(\"Embedder.documents\", \"Writer.documents\")\n",
    "\n",
    "pipeline.run(\n",
    "    {\"FileTypeRouter\": {\"sources\": file_paths}},\n",
    ")\n",
    "\n",
    "# Querying pipeline\n",
    "querying = Pipeline()\n",
    "querying.add_component(\"retriever\", ChromaQueryTextRetriever(chroma_store))\n",
    "results = querying.run({\"retriever\": {\"query\": \"Vishwam\", \"top_k\": 3}})\n",
    "print(results)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#Information Retriver"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'retriever': {'documents': [Document(id=ce02ebe3fa97972f0c76b2c175f658873b2d0e19987e9cbc38dcacb25b8ebdba, content: 'Aditya Sugandhi's journey as a Software Engineer is characterized by a deep commitment to excellence...', meta: {'file_path': '/unity/f2/asugandhi/Downloads/LLM_Playground/data/Aditya_train.txt', 'source_id': '228fb178549cb032d67e0b2da301131f48d7c88c814b6d6920c92727b1c8f5fd'}, score: 1.1221085786819458, embedding: vector of size 384), Document(id=11f7061bb8c56ae79965f1ba0d1a0283188dc031309394e1a03470d5d72207a9, content: 'Aditya Sugandhi is a seasoned Software Engineer with a rich background and diverse skill set, encomp...', meta: {'file_path': '/unity/f2/asugandhi/Downloads/LLM_Playground/data/Aditya_test.txt', 'source_id': 'c85a2287836cae980897693decb5e9d07e80f60b7c96b4e542ef3057e11fc228'}, score: 1.2236461639404297, embedding: vector of size 384), Document(id=a6ad41c3febd74d1f6825aac59c2d6dd7589ae8088bb3b449ea239c97d6f1b1c, content: ' . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 18\n",
      "1.2 HRES . . . . . . . . . . . . . ....', meta: {'file_path': '/unity/f2/asugandhi/Downloads/LLM_Playground/data/2212.12794.pdf', 'source_id': 'aa504618a25e65b870dde2fe288f395a44ff6a05c640fa7a2e6c5a5d3a9a44ef'}, score: 1.6584246158599854, embedding: vector of size 384)]}}\n"
     ]
    }
   ],
   "source": [
    "# # Querying pipeline\n",
    "# querying = Pipeline()\n",
    "# querying.add_component(\"retriever\", ChromaQueryTextRetriever(chroma_store))\n",
    "# results = querying.run({\"retriever\": {\"query\": \"Aditya\", \"top_k\": 3}})\n",
    "# print(results)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'str' object has no attribute 'resolve_value'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[28], line 29\u001b[0m\n\u001b[1;32m     25\u001b[0m api_key \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39menviron\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     27\u001b[0m \u001b[38;5;66;03m#ExtractiveReader to extract answers from the relevant context\u001b[39;00m\n\u001b[1;32m     28\u001b[0m \u001b[38;5;66;03m# api_key = Secret.from_token(\"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\")\u001b[39;00m\n\u001b[0;32m---> 29\u001b[0m llm \u001b[38;5;241m=\u001b[39m \u001b[43mOpenAIGenerator\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgpt-3.5-turbo-0125\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mapi_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mapi_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     30\u001b[0m reader \u001b[38;5;241m=\u001b[39m ExtractiveReader(model\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdeepset/roberta-base-squad2-distilled\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     32\u001b[0m extractive_qa_pipeline \u001b[38;5;241m=\u001b[39m Pipeline()\n",
      "File \u001b[0;32m/conda/asugandhi/miniconda3/envs/RAGAPP/lib/python3.10/site-packages/haystack/core/component/component.py:122\u001b[0m, in \u001b[0;36mComponentMeta.__call__\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m    117\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    118\u001b[0m \u001b[38;5;124;03mThis method is called when clients instantiate a Component and\u001b[39;00m\n\u001b[1;32m    119\u001b[0m \u001b[38;5;124;03mruns before __new__ and __init__.\u001b[39;00m\n\u001b[1;32m    120\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    121\u001b[0m \u001b[38;5;66;03m# This will call __new__ then __init__, giving us back the Component instance\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m instance \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__call__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    124\u001b[0m \u001b[38;5;66;03m# Before returning, we have the chance to modify the newly created\u001b[39;00m\n\u001b[1;32m    125\u001b[0m \u001b[38;5;66;03m# Component instance, so we take the chance and set up the I/O sockets\u001b[39;00m\n\u001b[1;32m    126\u001b[0m \n\u001b[1;32m    127\u001b[0m \u001b[38;5;66;03m# If `component.set_output_types()` was called in the component constructor,\u001b[39;00m\n\u001b[1;32m    128\u001b[0m \u001b[38;5;66;03m# `__haystack_output__` is already populated, no need to do anything.\u001b[39;00m\n\u001b[1;32m    129\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(instance, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__haystack_output__\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m    130\u001b[0m     \u001b[38;5;66;03m# If that's not the case, we need to populate `__haystack_output__`\u001b[39;00m\n\u001b[1;32m    131\u001b[0m     \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    134\u001b[0m     \u001b[38;5;66;03m# We deepcopy the content of the cache to transfer ownership from the class method\u001b[39;00m\n\u001b[1;32m    135\u001b[0m     \u001b[38;5;66;03m# to the actual instance, so that different instances of the same class won't share this data.\u001b[39;00m\n",
      "File \u001b[0;32m/conda/asugandhi/miniconda3/envs/RAGAPP/lib/python3.10/site-packages/haystack/components/generators/openai.py:103\u001b[0m, in \u001b[0;36mOpenAIGenerator.__init__\u001b[0;34m(self, api_key, model, streaming_callback, api_base_url, organization, system_prompt, generation_kwargs)\u001b[0m\n\u001b[1;32m    101\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_base_url \u001b[38;5;241m=\u001b[39m api_base_url\n\u001b[1;32m    102\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39morganization \u001b[38;5;241m=\u001b[39m organization\n\u001b[0;32m--> 103\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclient \u001b[38;5;241m=\u001b[39m OpenAI(api_key\u001b[38;5;241m=\u001b[39m\u001b[43mapi_key\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresolve_value\u001b[49m(), organization\u001b[38;5;241m=\u001b[39morganization, base_url\u001b[38;5;241m=\u001b[39mapi_base_url)\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'resolve_value'"
     ]
    }
   ],
   "source": [
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv() \n",
    "from haystack import Pipeline\n",
    "from haystack.utils import Secret\n",
    "from haystack_integrations.components.retrievers.chroma import ChromaQueryTextRetriever\n",
    "from haystack.components.readers import ExtractiveReader\n",
    "from haystack.components.generators import GPTGenerator\n",
    "from haystack.components.builders.prompt_builder import PromptBuilder\n",
    "from haystack.components.generators import OpenAIGenerator\n",
    "\n",
    "template = \"\"\"\n",
    "  `    Answer the question using the provided context based on Aditya.\n",
    "\n",
    "      Context:\n",
    "      {% for context in answers %}\n",
    "      {{ context }}\n",
    "      {% endfor %}\n",
    "      Question: {{question}}\n",
    "      Answer:\n",
    "      \"\"\"\n",
    "\n",
    "prompt_builder = PromptBuilder(template=template)\n",
    "retriever = ChromaQueryTextRetriever(document_store = chroma_store)\n",
    "api_key = os.environ.get(\"OPENAI_API_KEY\")\n",
    "\n",
    "#ExtractiveReader to extract answers from the relevant context\n",
    "api_key = Secret.from_token(api_key)\n",
    "llm = OpenAIGenerator(model=\"gpt-3.5-turbo-0125\",api_key=api_key)\n",
    "reader = ExtractiveReader(model=\"deepset/roberta-base-squad2-distilled\")\n",
    "\n",
    "extractive_qa_pipeline = Pipeline()\n",
    "extractive_qa_pipeline.add_component(\"retriever\", retriever)\n",
    "extractive_qa_pipeline.add_component('reader', reader)\n",
    "extractive_qa_pipeline.add_component(instance=prompt_builder,   name=\"prompt_builder\")\n",
    "extractive_qa_pipeline.add_component(\"llm\", llm)\n",
    "\n",
    "extractive_qa_pipeline.connect(\"retriever.documents\", \"reader.documents\")\n",
    "extractive_qa_pipeline.connect(\"reader.answers\", \"prompt_builder.answers\")\n",
    "extractive_qa_pipeline.connect(\"prompt_builder\", \"llm\")\n",
    "\n",
    "\n",
    "query = \"what is Aditya Pursuing ?\"\n",
    "print(query)\n",
    "# Define the input data for the pipeline components\n",
    "input_data = {\n",
    "    \"retriever\": {\"query\": query, \"top_k\": 2},\n",
    "    \"reader\": {\"query\": query, \"top_k\": 2},\n",
    "    \"prompt_builder\": {\"question\": query},\n",
    "     # Use 'max_tokens' instead of 'max_new_tokens'\n",
    "}\n",
    "\n",
    "# Run the pipeline with the updated input data\n",
    "results = extractive_qa_pipeline.run(input_data)\n",
    "print(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "isinstance() arg 2 must be a type, a tuple of types, or a union",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[5], line 9\u001b[0m\n\u001b[1;32m      7\u001b[0m             \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\n\u001b[1;32m      8\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mdefault(obj)\n\u001b[0;32m----> 9\u001b[0m json_results \u001b[38;5;241m=\u001b[39m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresults\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mExtractedAnswerEncoder\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     11\u001b[0m \u001b[38;5;28mprint\u001b[39m(json_results)\n",
      "File \u001b[0;32m/conda/asugandhi/miniconda3/envs/RAGAPP/lib/python3.10/json/__init__.py:238\u001b[0m, in \u001b[0;36mdumps\u001b[0;34m(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[1;32m    232\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    233\u001b[0m     \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONEncoder\n\u001b[1;32m    234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m    235\u001b[0m \u001b[43m    \u001b[49m\u001b[43mskipkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskipkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mensure_ascii\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_ascii\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    236\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcheck_circular\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_circular\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_nan\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_nan\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    237\u001b[0m \u001b[43m    \u001b[49m\u001b[43mseparators\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseparators\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefault\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msort_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m--> 238\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/conda/asugandhi/miniconda3/envs/RAGAPP/lib/python3.10/json/encoder.py:201\u001b[0m, in \u001b[0;36mJSONEncoder.encode\u001b[0;34m(self, o)\u001b[0m\n\u001b[1;32m    199\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miterencode(o, _one_shot\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m    200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(chunks, (\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m)):\n\u001b[0;32m--> 201\u001b[0m     chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mchunks\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    202\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(chunks)\n",
      "File \u001b[0;32m/conda/asugandhi/miniconda3/envs/RAGAPP/lib/python3.10/json/encoder.py:431\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode\u001b[0;34m(o, _current_indent_level)\u001b[0m\n\u001b[1;32m    429\u001b[0m     \u001b[38;5;28;01myield from\u001b[39;00m _iterencode_list(o, _current_indent_level)\n\u001b[1;32m    430\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(o, \u001b[38;5;28mdict\u001b[39m):\n\u001b[0;32m--> 431\u001b[0m     \u001b[38;5;28;01myield from\u001b[39;00m _iterencode_dict(o, _current_indent_level)\n\u001b[1;32m    432\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    433\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m markers \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[0;32m/conda/asugandhi/miniconda3/envs/RAGAPP/lib/python3.10/json/encoder.py:405\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode_dict\u001b[0;34m(dct, _current_indent_level)\u001b[0m\n\u001b[1;32m    403\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    404\u001b[0m             chunks \u001b[38;5;241m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[0;32m--> 405\u001b[0m         \u001b[38;5;28;01myield from\u001b[39;00m chunks\n\u001b[1;32m    406\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m newline_indent \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    407\u001b[0m     _current_indent_level \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n",
      "File \u001b[0;32m/conda/asugandhi/miniconda3/envs/RAGAPP/lib/python3.10/json/encoder.py:405\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode_dict\u001b[0;34m(dct, _current_indent_level)\u001b[0m\n\u001b[1;32m    403\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    404\u001b[0m             chunks \u001b[38;5;241m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[0;32m--> 405\u001b[0m         \u001b[38;5;28;01myield from\u001b[39;00m chunks\n\u001b[1;32m    406\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m newline_indent \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    407\u001b[0m     _current_indent_level \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n",
      "File \u001b[0;32m/conda/asugandhi/miniconda3/envs/RAGAPP/lib/python3.10/json/encoder.py:325\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode_list\u001b[0;34m(lst, _current_indent_level)\u001b[0m\n\u001b[1;32m    323\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    324\u001b[0m             chunks \u001b[38;5;241m=\u001b[39m _iterencode(value, _current_indent_level)\n\u001b[0;32m--> 325\u001b[0m         \u001b[38;5;28;01myield from\u001b[39;00m chunks\n\u001b[1;32m    326\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m newline_indent \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    327\u001b[0m     _current_indent_level \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n",
      "File \u001b[0;32m/conda/asugandhi/miniconda3/envs/RAGAPP/lib/python3.10/json/encoder.py:438\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode\u001b[0;34m(o, _current_indent_level)\u001b[0m\n\u001b[1;32m    436\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCircular reference detected\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    437\u001b[0m     markers[markerid] \u001b[38;5;241m=\u001b[39m o\n\u001b[0;32m--> 438\u001b[0m o \u001b[38;5;241m=\u001b[39m \u001b[43m_default\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    439\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m _iterencode(o, _current_indent_level)\n\u001b[1;32m    440\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m markers \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "Cell \u001b[0;32mIn[5], line 5\u001b[0m, in \u001b[0;36mExtractedAnswerEncoder.default\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdefault\u001b[39m(\u001b[38;5;28mself\u001b[39m, obj):\n\u001b[0;32m----> 5\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresults\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m      6\u001b[0m         \u001b[38;5;66;03m# Convert ExtractedAnswer to a dictionary\u001b[39;00m\n\u001b[1;32m      7\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m obj\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\n\u001b[1;32m      8\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mdefault(obj)\n",
      "\u001b[0;31mTypeError\u001b[0m: isinstance() arg 2 must be a type, a tuple of types, or a union"
     ]
    }
   ],
   "source": [
    "import json\n",
    "\n",
    "class ExtractedAnswerEncoder(json.JSONEncoder):\n",
    "    def default(self, obj):\n",
    "        if isinstance(obj, results):\n",
    "            # Convert ExtractedAnswer to a dictionary\n",
    "            return obj.__dict__\n",
    "        return super().default(obj)\n",
    "json_results = json.dumps(results, indent=2, cls=ExtractedAnswerEncoder)\n",
    "\n",
    "print(json_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "p"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "RAGAPP",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}