|
from pypdf import PdfReader |
|
import torch |
|
import PyPDF2 |
|
from io import BytesIO |
|
from langchain.prompts import PromptTemplate |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain.chains import RetrievalQA |
|
import gradio as gr |
|
import time |
|
|
|
from langchain.memory import ConversationBufferMemory |
|
|
|
|
|
from langchain.llms.huggingface_pipeline import HuggingFacePipeline |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig |
|
from langchain.document_loaders import PyPDFDirectoryLoader |
|
|
|
CHUNK_SIZE = 1000 |
|
|
|
embeddings = HuggingFaceEmbeddings( |
|
model_name="sentence-transformers/all-mpnet-base-v2",model_kwargs = {"device": "cuda"}) |
|
|
|
|
|
quant_config = BitsAndBytesConfig( |
|
bnb_4bit_compute_dtype=torch.bfloat16 |
|
) |
|
|
|
|
|
def load_llm(): |
|
|
|
model_id = "Deci/DeciLM-6b-instruct" |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
model = AutoModelForCausalLM.from_pretrained(model_id, |
|
trust_remote_code=True, |
|
device_map = "auto", |
|
quantization_config=quant_config) |
|
pipe = pipeline("text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
temperature=0, |
|
num_beams=5, |
|
no_repeat_ngram_size=4, |
|
early_stopping=True, |
|
max_new_tokens=50, |
|
) |
|
|
|
llm = HuggingFacePipeline(pipeline=pipe) |
|
|
|
return llm |
|
|
|
def add_text(history, text): |
|
if not text: |
|
raise gr.Error('Enter text') |
|
history = history + [(text, '')] |
|
|
|
return history |
|
|
|
def upload_file(file): |
|
|
|
print(type(file)) |
|
return file |
|
|
|
def process_file(files): |
|
|
|
|
|
|
|
|
|
|
|
|
|
pdf_text = "" |
|
for file in files: |
|
|
|
pdf = PyPDF2.PdfReader(file.name) |
|
for page in pdf.pages: |
|
pdf_text += page.extract_text() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=200) |
|
|
|
splits = text_splitter.create_documents([pdf_text]) |
|
|
|
|
|
|
|
|
|
vectorstore_db = FAISS.from_documents(splits, embeddings) |
|
|
|
|
|
custom_prompt_template = """Given the uploaded files, generate a pecise answer to the question asked by the user. |
|
If you don't know the answer, just say that you don't know, don't try to make up an answer. |
|
Context= {context} |
|
History = {history} |
|
Question= {question} |
|
Helpful Answer: |
|
""" |
|
prompt = PromptTemplate(template=custom_prompt_template, input_variables=["question", "context", "history"]) |
|
|
|
|
|
|
|
qa_chain_with_memory = RetrievalQA.from_chain_type(llm=load_llm(), |
|
chain_type='stuff', |
|
return_source_documents=True, |
|
retriever=vectorstore_db.as_retriever(), |
|
chain_type_kwargs={"verbose": True, |
|
"prompt": prompt, |
|
"memory": ConversationBufferMemory( |
|
input_key="question", |
|
memory_key="history", |
|
return_messages=True) }) |
|
|
|
|
|
return qa_chain_with_memory |
|
|
|
|
|
def generate_bot_response(history,query, btn): |
|
|
|
if not btn: |
|
raise gr.Error(message='Upload a PDF') |
|
|
|
qa_chain_with_memory = process_file(btn) |
|
|
|
|
|
bot_response = qa_chain_with_memory({"query": query}) |
|
|
|
|
|
for char in bot_response['result']: |
|
history[-1][-1] += char |
|
time.sleep(0.05) |
|
yield history,'' |
|
|
|
with gr.Blocks() as demo: |
|
with gr.Row(): |
|
with gr.Row(): |
|
chatbot = gr.Chatbot(label="DeciLM-6b-instruct bot", value=[], elem_id='chatbot') |
|
with gr.Row(): |
|
file_output = gr.File(label="Your PDFs") |
|
with gr.Column(): |
|
btn = gr.UploadButton("π Upload a PDF(s)", file_types=[".pdf"], file_count="multiple") |
|
|
|
|
|
with gr.Column(): |
|
with gr.Column(): |
|
txt = gr.Text(show_label=False, placeholder="Enter question") |
|
|
|
with gr.Column(): |
|
submit_btn = gr.Button('Ask') |
|
|
|
|
|
|
|
btn.upload(fn=upload_file, inputs=[btn], outputs=[file_output]) |
|
|
|
|
|
submit_btn.click( |
|
fn= add_text, |
|
inputs=[chatbot, txt], |
|
outputs=[chatbot], |
|
queue=False |
|
).success( |
|
fn=generate_bot_response, |
|
inputs=[chatbot, txt, btn], |
|
outputs=[chatbot, txt] |
|
).success( |
|
fn=upload_file, |
|
inputs=[btn], |
|
outputs=[file_output] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |