Spaces:
Runtime error
Runtime error
File size: 29,335 Bytes
919910a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 |
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"bert_load_from_file: gguf version = 2\n",
"bert_load_from_file: gguf alignment = 32\n",
"bert_load_from_file: gguf data offset = 695552\n",
"bert_load_from_file: model name = BERT\n",
"bert_load_from_file: model architecture = bert\n",
"bert_load_from_file: model file type = 1\n",
"bert_load_from_file: bert tokenizer vocab = 30522\n",
"[0.01552767027169466, 0.08103805035352707, -0.12307794392108917, 0.09815496951341629, 0.023653453215956688, -0.06102974712848663, 0.07934562116861343, 0.02745242230594158, -0.028132867068052292, 0.03221212700009346, 0.12919503450393677, 0.0025996030308306217, -0.04139482602477074, -0.06577245146036148, -0.014648980461061, 0.015588296577334404, -0.08434717357158661, -0.07182654738426208, 0.014775916934013367, -0.07444048672914505, 0.0590442530810833, 0.04814479872584343, 0.06639457494020462, 0.008800982497632504, -0.017847837880253792, -0.020949387922883034, -0.026810096576809883, 0.026885343715548515, -0.0764176994562149, -0.057069629430770874, 0.039454489946365356, 0.06288687884807587, 0.036681558936834335, 0.03875448554754257, 0.09926188737154007, 0.07691209763288498, -0.0007747725467197597, -0.05224066600203514, -0.06268111616373062, -0.00026997251552529633, 0.06668399274349213, -0.10031015425920486, -0.00970512256026268, -0.01601257175207138, -0.03624574467539787, -0.10884801298379898, -0.027961881831288338, -0.02198118157684803, 0.011900517158210278, -0.005993946921080351, -0.08890494704246521, -0.01797824539244175, -0.040237877517938614, -0.049093399196863174, -0.019428042694926262, -0.005168401636183262, 0.032794076949357986, -0.03235733509063721, -0.0705694779753685, -0.0941174328327179, -0.051176246255636215, 0.08234924077987671, -0.020688237622380257, 0.026870127767324448, -0.031070750206708908, 0.021878499537706375, -0.06237325817346573, 0.07108485698699951, 0.0030630987603217363, -0.06985890865325928, -0.05954312905669212, -0.05837850645184517, -0.09073222428560257, 0.005469962954521179, -0.021687401458621025, 0.0314265601336956, -0.025661440566182137, -0.0495171844959259, 0.0394166000187397, -0.029094435274600983, -0.018130596727132797, -0.04031619802117348, 0.08927112817764282, 0.00014257561997510493, -0.026646623387932777, 0.06340110301971436, 0.07394086569547653, 0.014260515570640564, -0.023962723091244698, -0.06585869938135147, 0.04496406018733978, 0.04277855530381203, 0.008617856539785862, 0.0665624663233757, 0.026723850518465042, 0.01059289276599884, 0.011615158058702946, -0.04054207354784012, -0.04994109272956848, 0.10845799744129181, 0.036834508180618286, 0.045918650925159454, -0.05060620605945587, 0.11201019585132599, -0.11668886244297028, -0.01581607758998871, 0.0960628017783165, -0.0488315187394619, 0.024895356968045235, -0.04963228479027748, -0.03182365745306015, -0.004189752042293549, -0.022618744522333145, -0.020297333598136902, 0.010558796115219593, -0.03451183810830116, -0.08592583984136581, 0.07002798467874527, -0.0014977692626416683, -0.020605681464076042, 0.0009889955399557948, -0.06769613176584244, -0.016587721183896065, -0.03945926949381828, 0.027652334421873093, -0.0037252188194543123, 4.02796795242466e-05, 2.496357863577944e-34, -0.019553543999791145, -0.006931365933269262, 0.05519813671708107, 0.030014386400580406, -0.027222076430916786, -0.0040949187241494656, 0.028509650379419327, 0.0003461719024926424, -0.07768791913986206, 0.026781603693962097, -0.021593185141682625, -0.043786026537418365, 0.03954899311065674, -0.029267827048897743, 0.03505752608180046, 0.005345764569938183, -0.01677117310464382, 0.08446278423070908, 0.05020565167069435, 0.041258785873651505, 0.03950535133481026, 0.05992049351334572, 0.004634900484234095, -0.0946483463048935, -0.028090720996260643, -0.03398402780294418, -0.02709619328379631, -0.04133094474673271, -0.005644459743052721, 0.032718855887651443, 0.010113613680005074, -0.02065439336001873, -0.016786033287644386, 0.03233509510755539, -0.06616782397031784, 0.029395416378974915, -0.00663745915517211, -0.06478383392095566, -0.09521140158176422, -0.010280981659889221, -0.03638819605112076, -0.007304533384740353, 0.13017326593399048, -0.06668204814195633, -0.012214419431984425, 0.09507791697978973, -0.0009454676182940602, 0.045288313180208206, 0.061766546219587326, 0.06407830119132996, -0.06472055613994598, 0.02868455834686756, 0.014445719309151173, 0.03761356323957443, 0.04157082363963127, 0.007912926375865936, -0.028237026184797287, -0.048911020159721375, 0.05634745582938194, 0.0031706185545772314, 0.024482648819684982, -0.0926365926861763, -0.028224240988492966, 0.01816745474934578, -0.0009234159952029586, -0.06061384454369545, 0.02713773585855961, -0.0657828152179718, 0.06030780076980591, 0.05763610824942589, -0.0024990146048367023, -0.031143246218562126, 0.014573169872164726, 0.05780758708715439, -0.005530690308660269, -0.024387281388044357, 0.025631394237279892, 0.04571927711367607, -0.07182186841964722, 0.02106345444917679, 0.047523558139801025, -0.025845326483249664, 0.04639439284801483, -0.0461527556180954, 0.06309600919485092, 0.002871520584449172, -0.019818803295493126, -0.01131194643676281, 0.04196448624134064, -0.017453346401453018, -0.043370626866817474, 0.06779050827026367, -0.11423997581005096, -0.007464131806045771, 0.07379034906625748, -1.0159212682046505e-33, 0.04116467386484146, -0.02187393046915531, -0.06464317440986633, -0.04831999912858009, 0.054312679916620255, -0.04359174892306328, 0.10390615463256836, -0.008244805969297886, 0.02429776079952717, 0.08679671585559845, 0.03324231505393982, -0.04018168896436691, 0.023248450830578804, -0.11267966777086258, 0.027334723621606827, -0.018510276451706886, -0.015763893723487854, -0.06620948016643524, -0.029428796842694283, 0.024292776361107826, -0.0836699977517128, 0.06186313182115555, 0.00979425199329853, 0.0149845527485013, 0.02952435240149498, -0.01609259471297264, 0.06341543793678284, 0.025381680577993393, -0.07650972157716751, -0.08898097276687622, 0.0543917752802372, 0.029732191935181618, -0.12705901265144348, 0.11817684024572372, 0.05331788584589958, -0.03143112361431122, 0.0274629145860672, 0.007251844275742769, -0.031150249764323235, 0.0817786380648613, 0.01751711592078209, 0.07238985598087311, -0.006944955326616764, -0.0723976194858551, 0.034229815006256104, -0.003155543003231287, 0.011516829021275043, -0.06810746341943741, 0.09528303891420364, -0.03101549670100212, 0.04598725214600563, -0.032259490340948105, 0.07952931523323059, 0.011015753261744976, 0.07233146578073502, 0.04757140204310417, 0.07436589896678925, 0.03568919375538826, -0.05899377539753914, -0.07132003456354141, 0.02570781111717224, 0.05620163306593895, 0.029458558186888695, 0.07280883193016052, 0.014483439736068249, -0.09305085241794586, 0.04503859579563141, -0.07544805109500885, 0.04793871194124222, -0.0066075995564460754, -0.027827860787510872, -0.07631555944681168, -0.05412726849317551, 0.056384310126304626, 0.056813593953847885, 0.06885606050491333, -0.001682625850662589, -0.021189114078879356, -0.004618695937097073, -0.04061309993267059, 0.10019382834434509, -0.030752010643482208, 0.036137741059064865, 0.035284142941236496, 0.022952962666749954, 0.0072324820794165134, 0.0515342652797699, 0.020784474909305573, 0.005023692734539509, 0.019894951954483986, 0.05247249826788902, 0.020828237757086754, -0.010321374982595444, 0.0026851524598896503, 0.0014503364218398929, -1.771797109029194e-08, -0.07890938222408295, -0.10603849589824677, -0.04075992852449417, 0.07047312706708908, -0.053525179624557495, 0.028504792600870132, -0.01275587547570467, -0.04736935719847679, -0.044071078300476074, -0.016645105555653572, -0.04981076717376709, -0.010642158798873425, 0.017387278378009796, 0.015506042167544365, -0.02702799066901207, -0.06912237405776978, -0.006346073932945728, 0.048564061522483826, 0.019542649388313293, -0.10184305161237717, -0.02131459303200245, 0.002071274910122156, 0.06019570678472519, -0.04933277890086174, -0.023822331801056862, 0.061753757297992706, 0.03395755961537361, 0.035142987966537476, 0.04514467716217041, -0.04209870100021362, 0.051735058426856995, -0.010264404118061066, 0.010600893758237362, -0.04388001188635826, 0.048436664044857025, 0.09170644730329514, 0.0874226912856102, 0.02946961112320423, -0.0049003129824995995, 0.03189241513609886, -0.05068569630384445, 0.04898029565811157, 0.06254067271947861, -0.021246548742055893, 0.041442159563302994, -0.04294992610812187, -0.11569153517484665, -0.029132820665836334, 0.027501607313752174, -0.11903877556324005, -0.0024651181884109974, -0.019488628953695297, 0.032330770045518875, 0.014155727811157703, -0.019860858097672462, -0.03563971444964409, 0.03158700466156006, 0.04575197398662567, -0.04244818910956383, 0.007442069705575705, 0.12420977652072906, -0.0006733344052918255, 0.0338529571890831, -0.03671126440167427]\n"
]
}
],
"source": [
"from gpt4all import GPT4All, Embed4All\n",
"text = 'Aditya_test.txt'\n",
"embedder = Embed4All()\n",
"output = embedder.embed(text)\n",
"print(output)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import langchain_community as lcc\n",
"from langchain_community.chat_models import ChatHuggingFace\n",
"\n",
"local_llm = 'NousResearch/Yarn-Mistral-7b-128k'\n",
"llm = ChatOllama(model=local_llm, temperature=0)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"bert_load_from_file: gguf version = 2\n",
"bert_load_from_file: gguf alignment = 32\n",
"bert_load_from_file: gguf data offset = 695552\n",
"bert_load_from_file: model name = BERT\n",
"bert_load_from_file: model architecture = bert\n",
"bert_load_from_file: model file type = 1\n",
"bert_load_from_file: bert tokenizer vocab = 30522\n"
]
}
],
"source": [
"from langchain_community.embeddings import GPT4AllEmbeddings\n",
"\n",
"embedder = GPT4AllEmbeddings()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'dict' object has no attribute 'page_content'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[37], line 13\u001b[0m\n\u001b[1;32m 10\u001b[0m adjusted_documents \u001b[38;5;241m=\u001b[39m [{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpage_content\u001b[39m\u001b[38;5;124m'\u001b[39m: doc[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m'\u001b[39m], \u001b[38;5;124m'\u001b[39m\u001b[38;5;124membedding\u001b[39m\u001b[38;5;124m'\u001b[39m: doc[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124membedding\u001b[39m\u001b[38;5;124m'\u001b[39m]} \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m documents]\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# Then, attempt to create the vector store with the adjusted document format\u001b[39;00m\n\u001b[0;32m---> 13\u001b[0m vectorstore \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_documents\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43mdocuments\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43madjusted_documents\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrag-chroma\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43membedder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 17\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 18\u001b[0m retriever \u001b[38;5;241m=\u001b[39m vectorstore\u001b[38;5;241m.\u001b[39mas_retriever()\n",
"File \u001b[0;32m~/.local/share/virtualenvs/LLM_Playground-SHCTkmIS/lib/python3.11/site-packages/langchain_community/vectorstores/chroma.py:776\u001b[0m, in \u001b[0;36mChroma.from_documents\u001b[0;34m(cls, documents, embedding, ids, collection_name, persist_directory, client_settings, client, collection_metadata, **kwargs)\u001b[0m\n\u001b[1;32m 745\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 746\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_documents\u001b[39m(\n\u001b[1;32m 747\u001b[0m \u001b[38;5;28mcls\u001b[39m: Type[Chroma],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 756\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 757\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Chroma:\n\u001b[1;32m 758\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Create a Chroma vectorstore from a list of documents.\u001b[39;00m\n\u001b[1;32m 759\u001b[0m \n\u001b[1;32m 760\u001b[0m \u001b[38;5;124;03m If a persist_directory is specified, the collection will be persisted there.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 774\u001b[0m \u001b[38;5;124;03m Chroma: Chroma vectorstore.\u001b[39;00m\n\u001b[1;32m 775\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 776\u001b[0m texts \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[43mdoc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpage_content\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdoc\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdocuments\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 777\u001b[0m metadatas \u001b[38;5;241m=\u001b[39m [doc\u001b[38;5;241m.\u001b[39mmetadata \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m documents]\n\u001b[1;32m 778\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mfrom_texts(\n\u001b[1;32m 779\u001b[0m texts\u001b[38;5;241m=\u001b[39mtexts,\n\u001b[1;32m 780\u001b[0m embedding\u001b[38;5;241m=\u001b[39membedding,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 788\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 789\u001b[0m )\n",
"File \u001b[0;32m~/.local/share/virtualenvs/LLM_Playground-SHCTkmIS/lib/python3.11/site-packages/langchain_community/vectorstores/chroma.py:776\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 745\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 746\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_documents\u001b[39m(\n\u001b[1;32m 747\u001b[0m \u001b[38;5;28mcls\u001b[39m: Type[Chroma],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 756\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 757\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Chroma:\n\u001b[1;32m 758\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Create a Chroma vectorstore from a list of documents.\u001b[39;00m\n\u001b[1;32m 759\u001b[0m \n\u001b[1;32m 760\u001b[0m \u001b[38;5;124;03m If a persist_directory is specified, the collection will be persisted there.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 774\u001b[0m \u001b[38;5;124;03m Chroma: Chroma vectorstore.\u001b[39;00m\n\u001b[1;32m 775\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 776\u001b[0m texts \u001b[38;5;241m=\u001b[39m [\u001b[43mdoc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpage_content\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m documents]\n\u001b[1;32m 777\u001b[0m metadatas \u001b[38;5;241m=\u001b[39m [doc\u001b[38;5;241m.\u001b[39mmetadata \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m documents]\n\u001b[1;32m 778\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mfrom_texts(\n\u001b[1;32m 779\u001b[0m texts\u001b[38;5;241m=\u001b[39mtexts,\n\u001b[1;32m 780\u001b[0m embedding\u001b[38;5;241m=\u001b[39membedding,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 788\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 789\u001b[0m )\n",
"\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'page_content'"
]
}
],
"source": [
"from langchain_community.vectorstores import Chroma\n",
"\n",
"# Example of preparing 'documents' variable (assuming each document is a string in a list)\n",
"# Here you would convert each text document into an embedding and prepare it as needed\n",
"\n",
"# Assuming 'embedder.embed(doc_text)' returns a numeric vector for each document\n",
"documents = [{'text': doc_text, 'embedding': embedder.embed(doc_text)} for doc_text in documents_list]\n",
"\n",
"# If Chroma expects a 'page_content' attribute, adjust your dictionaries accordingly\n",
"adjusted_documents = [{'page_content': doc['text'], 'embedding': doc['embedding']} for doc in documents]\n",
"\n",
"# Then, attempt to create the vector store with the adjusted document format\n",
"vectorstore = Chroma.from_documents(\n",
" documents=adjusted_documents,\n",
" collection_name=\"rag-chroma\",\n",
" embedding=embedder,\n",
")\n",
"retriever = vectorstore.as_retriever()\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# Assuming 'query' is defined and TextLoader is set up\n",
"query = \"who is Aditya\"\n",
"documents = TextLoader.load_documents(query)\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "cannot import name 'Rag' from 'langchain.llms' (/Users/adityasugandhi/.local/share/virtualenvs/LLM_Playground-SHCTkmIS/lib/python3.11/site-packages/langchain/llms/__init__.py)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[27], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mllms\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Rag\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Initialize RAG model (ensure you have a compatible model loaded)\u001b[39;00m\n\u001b[1;32m 4\u001b[0m rag_model \u001b[38;5;241m=\u001b[39m Rag()\n",
"\u001b[0;31mImportError\u001b[0m: cannot import name 'Rag' from 'langchain.llms' (/Users/adityasugandhi/.local/share/virtualenvs/LLM_Playground-SHCTkmIS/lib/python3.11/site-packages/langchain/llms/__init__.py)"
]
}
],
"source": [
"from langchain_community.llms import Rag\n",
"\n",
"# Initialize RAG model (ensure you have a compatible model loaded)\n",
"rag_model = Rag()\n",
"\n",
"# Example function to generate answers using RAG and the retrieved documents\n",
"def generate_answer(rag_model, query, documents):\n",
" # Convert documents to a format suitable for the model, if necessary\n",
" context = ' '.join(documents) # Simplified; you might need a more sophisticated approach\n",
" \n",
" # Generate an answer using the RAG model\n",
" answer = rag_model.generate(query, context, \n",
" generation_kwargs={\"max_length\": 256, \"temperature\": 0.7})\n",
" return answer\n",
"\n",
"# Generate an answer for the query using retrieved documents as context\n",
"answer = generate_answer(rag_model, query, documents)\n",
"print(\"Generated Answer:\", answer)\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"His previous role as a Software Engineer at Aspire Systems in Chennai, India, showcases Aditya's versatility in both backend and frontend development. Leading the redesign of a Life Insurance Company's architecture, he prioritized low latency and high throughput, emphasizing a customer-centric approach. Aditya engineered 20 SOAP APIs for responsive patient data management, collaborated on front-end enhancements, and implemented secure payment gateways and Single Sign-On for authentication. His contribution to debugging strategies, real-time log analysis with Splunk, and CI/CD pipelines with Jenkins further underscore his commitment to optimizing system performance.\n"
]
}
],
"source": [
"# Example structure for fine-tuning (high-level and simplified)\n",
"from langchain.training import train_model\n",
"\n",
"# Define your training dataset\n",
"training_data = [(\"Question 1\", \"Answer 1\"), (\"Question 2\", \"Answer 2\"), ...]\n",
"\n",
"# Train (fine-tune) the model\n",
"train_model(rag_model, training_data, epochs=5, learning_rate=1e-5)\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/adityasugandhi/.local/share/virtualenvs/LLM_Playground-SHCTkmIS/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n"
]
}
],
"source": [
"from transformers import RagTokenizer, RagRetriever, RagTokenForGeneration\n",
"\n",
"tokenizer = RagTokenizer.from_pretrained(\"facebook/rag-token-base\")\n",
"retriever = RagRetriever.from_pretrained(\"facebook/rag-token-base\")\n",
"generator = RagTokenForGeneration.from_pretrained(\"facebook/rag-token-base\")\n",
"\n",
"\n",
"def generate_answer(tokenizer, retriever, generator, query, documents):\n",
" inputs = tokenizer(query, documents, return_tensors=\"pt\", padding=\"max_length\", max_length=256, truncation=True)\n",
" input_ids = inputs[\"input_ids\"]\n",
" attention_mask = inputs[\"attention_mask\"]\n",
" doc_scores = retriever(input_ids, attention_mask)\n",
" context_input_ids = input_ids.new_full((input_ids.shape[0], 1), tokenizer.context_id, dtype=torch.long)\n",
" context_attention_mask = input_ids.new_full(context_input_ids.shape, 1)\n",
" generator_input_ids = torch.cat([context_input_ids, input_ids], dim=1)\n",
" generator_attention_mask = torch.cat([context_attention_mask, attention_mask], dim=1)\n",
" outputs = generator.generate(generator_input_ids, attention_mask=generator_attention_mask, doc_scores=doc_scores)\n",
" return tokenizer.batch_decode(outputs, skip_special_tokens=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'haystack.indexing'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[4], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtimeit\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mhaystack\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mindexing\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcleaning\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m clean_wiki_text\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mhaystack\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mindexing\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m open_file, fetch_archive_from_http\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mhaystack\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mindexing\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m convert_files_to_dicts, fetch_archive_from_http\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'haystack.indexing'"
]
}
],
"source": [
"import os\n",
"import timeit\n",
"# from haystack.indexing.cleaning import clean_wiki_text\n",
"# from haystack.indexing.io import open_file, fetch_archive_from_http\n",
"# from haystack.indexing.utils import convert_files_to_dicts, fetch_archive_from_http\n",
"from haystack.preprocessor.cleaning import clean_whitespace, clean_html, clean_preprocessor,clean_wiki_text\n",
"from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http\n",
"from haystack.preprocessor import PreProcessor\n",
"from haystack.document_store import InMemoryDocumentStore, WeaviateDocumentStore\n",
"from haystack.retriever.dense import EmbeddingRetriever\n",
"from haystack.utils import print_answers\n",
"\n",
"def run_ingest():\n",
" # Update DATA_PATH to include \"Aditya_train.txt\"\n",
" data_file = \"Aditya_train.txt\"\n",
" DATA_PATH = os.path.join(cfg.DATA_PATH, data_file)\n",
" \n",
" # Ensure the file exists\n",
" if os.path.isfile(DATA_PATH):\n",
" start = timeit.default_timer()\n",
"\n",
" vector_store = WeaviateDocumentStore(host=cfg.WEAVIATE_HOST,\n",
" port=cfg.WEAVIATE_PORT,\n",
" embedding_dim=cfg.WEAVIATE_EMBEDDING_DIM)\n",
"\n",
" # Convert text files to dictionaries\n",
" raw_docs = convert_files_to_dicts(dir_path=DATA_PATH, clean_func=clean_wiki_text, split_paragraphs=True)\n",
"\n",
" # Convert to desired format\n",
" final_doc = []\n",
" for doc in raw_docs:\n",
" new_doc = {\n",
" 'content': doc['text'],\n",
" 'meta': {'name': doc['name']}\n",
" }\n",
" final_doc.append(new_doc)\n",
"\n",
" preprocessor = PreProcessor(\n",
" clean_empty_lines=True,\n",
" clean_whitespace=False,\n",
" clean_header_footer=False,\n",
" split_by=\"word\",\n",
" language=\"en\",\n",
" split_length=cfg.PRE_PROCESSOR_SPLIT_LENGTH,\n",
" split_overlap=cfg.PRE_PROCESSOR_SPLIT_OVERLAP,\n",
" split_respect_sentence_boundary=True,\n",
" )\n",
"\n",
" preprocessed_docs = preprocessor.process(final_doc)\n",
" vector_store.write_documents(preprocessed_docs)\n",
"\n",
" retriever = EmbeddingRetriever(\n",
" document_store=vector_store,\n",
" embedding_model=cfg.EMBEDDINGS\n",
" )\n",
" vector_store.update_embeddings(retriever)\n",
"\n",
" end = timeit.default_timer()\n",
" print(f\"Time to prepare embeddings: {end - start}\")\n",
" else:\n",
" print(f\"File {data_file} not found in the specified DATA_PATH.\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Langchain",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|