# import bs4 from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader from langchain_community.vectorstores import FAISS from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint from langchain.prompts import PromptTemplate import gradio as gr repo_id = "HuggingFaceH4/zephyr-7b-beta" llm = HuggingFaceEndpoint( repo_id=repo_id, max_length=128, temperature=0.1 ) def web_load(path): loader = WebBaseLoader( web_paths=(path,), # bs_kwargs=dict( # parse_only=bs4.SoupStrainer( # class_=("post-content", "post-title", "post-header") # ) # ), ) docs = loader.load() return docs def pdf_load(path): loader = PyPDFLoader(path) pages = loader.load_and_split() return pages def vector_store(path): if path.endswith(".pdf"): docs = pdf_load(path) elif path.startswith("http" or "www"): docs = web_load(path) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) splits = text_splitter.split_documents(docs) vectorstore = FAISS.from_documents(documents=splits, embedding=HuggingFaceEmbeddings(model_name='BAAI/bge-base-en-v1.5')) return vectorstore, "Done setup! You may proceed to Chatbot. " def invoke(user_input, retriever): prompt_template = """ <|system|> Answer the question based on your knowledge. Use the following context to help: {context} <|user|> {question} <|assistant|> """ prompt = PromptTemplate( input_variables=["context", "question"], template=prompt_template, ) rag_chain = ( {"context": retriever, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) ans = rag_chain.invoke(user_input) return ans def rag_chatbot(vectorstore, user_input, chat_history): retriever = vectorstore.as_retriever() answer = invoke(user_input, retriever) chat_history.append((user_input, answer)) return "", chat_history def source (radio, source1, source2): if radio == "website": return source1 elif radio == "PDF": return source2 with gr.Blocks() as demo: vectorstore = gr.State() with gr.Tab("Setup"): gr.Markdown("Input a website ULR or upload a PDF file") with gr.Row(): source1 = gr.Textbox(label="Input website",) source2 = gr.Files(label="Upload a PDF file", file_count="single", file_types=["pdf"]) radio = gr.Radio(["website", "PDF"], label="Select type of source", interactive=True) path = gr.Textbox(label="Path of source", visible=True, interactive=False) radio.change(fn=source, inputs=[radio,source1,source2], outputs=path) source1.change(fn=source, inputs=[radio,source1,source2], outputs=path) source2.change(fn=source, inputs=[radio,source1,source2], outputs=path) done = gr.Textbox(label="Progress", interactive=False) setup_btn = gr.Button("Initialize vectorstore") setup_btn.click(fn=vector_store, inputs=[path], outputs=[vectorstore, done]) with gr.Tab("Chatbot"): chatbot = gr.Chatbot() msg = gr.Textbox() with gr.Row(): clear = gr.ClearButton([msg, chatbot], icon="https://img.icons8.com/?size=100&id=Xnx8cxDef16O&format=png&color=000000") send_btn = gr.Button("Send", variant='primary', icon="https://img.icons8.com/?size=100&id=g8ltXTwIfJ1n&format=png&color=000000") msg.submit(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot]) send_btn.click(fn=rag_chatbot, inputs=[vectorstore, msg, chatbot], outputs=[msg, chatbot]) if __name__ == "__main__": demo.launch()