from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_qdrant import Qdrant from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory from langchain_google_genai import ChatGoogleGenerativeAI from src.embeddings import get_embeddings def get_pdf_text(pdf_docs): text="" for pdf in pdf_docs: pdf_reader= PdfReader(pdf) for page in pdf_reader.pages: text+= page.extract_text() print("Extracted the text.......") return text def get_text_chunks(text,chunk_size,chunk_overlap): text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) chunks = text_splitter.split_text(text) print("Chunking Done.......") return chunks def get_vector_store(chunks,target_collection,url,api_key): vector_store = Qdrant.from_texts( chunks, embedding = get_embeddings(), url=url, api_key=api_key, prefer_grpc=False, collection_name=target_collection, timeout=75 ) print("Vector store successfully created..........") print(f"vector store = {vector_store}") return vector_store def get_conversational_chain(vector_store,google_api_key): llm=ChatGoogleGenerativeAI(model="gemini-1.5-pro",google_api_key = google_api_key) memory = ConversationBufferMemory(memory_key = "chat_history", return_messages=True) conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory) return conversation_chain