import os from langchain.vectorstores import FAISS from langchain.document_loaders import PyPDFLoader from langchain.chains.question_answering import load_qa_chain from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain.document_loaders import UnstructuredFileLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQAWithSourcesChain from huggingface_hub import notebook_login from transformers import pipeline from transformers import AutoTokenizer, AutoModelForCausalLM from langchain import HuggingFacePipeline from langchain.text_splitter import CharacterTextSplitter import textwrap import sys import torch os.environ['HuggingFaceHub_API_Token']= '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' loader = UnstructuredFileLoader('Highway Traffic Act, R.S.O. 1990, c. H.8.pdf') documents = loader.load() text_splitter=CharacterTextSplitter(separator='\n',chunk_size=1500,chunk_overlap=300) text_chunks=text_splitter.split_documents(documents) embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'}) vectorstore=FAISS.from_documents(text_chunks, embeddings) notebook_login() os.environ['HuggingFaceHub_API_Token']= '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' tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-hf") model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-hf", device_map='auto',torch_dtype=torch.float16,load_in_4bit=True, token=True ) pipe = pipeline("text-generation",model=model,tokenizer= tokenizer,torch_dtype=torch.bfloat16,device_map="auto",max_new_tokens = 1024,do_sample=True,top_k=10,num_return_sequences=1, eos_token_id=tokenizer.eos_token_id) llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0.5}) chain = RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff",return_source_documents=True, retriever=vectorstore.as_retriever()) query = "Can goat and paint be transported in same truck ?" result=chain({"query": query}, return_only_outputs=True) wrapped_text = textwrap.fill(result['result'], width=500) wrapped_text