|
from PyPDF2 import PdfReader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_qdrant import Qdrant |
|
from langchain.chains import ConversationalRetrievalChain |
|
from langchain.memory import ConversationBufferMemory |
|
from langchain_google_genai import ChatGoogleGenerativeAI |
|
from src.embeddings import get_embeddings |
|
|
|
|
|
def get_pdf_text(pdf_docs): |
|
|
|
text="" |
|
|
|
for pdf in pdf_docs: |
|
pdf_reader= PdfReader(pdf) |
|
for page in pdf_reader.pages: |
|
text+= page.extract_text() |
|
|
|
print("Extracted the text.......") |
|
return text |
|
|
|
def get_text_chunks(text,chunk_size,chunk_overlap): |
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) |
|
chunks = text_splitter.split_text(text) |
|
|
|
print("Chunking Done.......") |
|
|
|
return chunks |
|
|
|
def get_vector_store(chunks,target_collection,url,api_key): |
|
|
|
vector_store = Qdrant.from_texts( |
|
chunks, |
|
embedding = get_embeddings(), |
|
url=url, |
|
api_key=api_key, |
|
prefer_grpc=False, |
|
collection_name=target_collection, |
|
timeout=75 |
|
) |
|
|
|
print("Vector store successfully created..........") |
|
print(f"vector store = {vector_store}") |
|
|
|
return vector_store |
|
|
|
def get_conversational_chain(vector_store,google_api_key): |
|
|
|
llm=ChatGoogleGenerativeAI(model="gemini-1.5-pro",google_api_key = google_api_key) |
|
memory = ConversationBufferMemory(memory_key = "chat_history", return_messages=True) |
|
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory) |
|
return conversation_chain |