Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
from PyPDF2 import PdfReader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.callbacks import get_openai_callback | |
from langchain import HuggingFaceHub, LLMChain | |
from langchain.embeddings import HuggingFaceHubEmbeddings,HuggingFaceInferenceAPIEmbeddings | |
token = os.environ['HF_TOKEN']="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" | |
repo_id = "sentence-transformers/all-mpnet-base-v2" | |
hf = HuggingFaceHubEmbeddings( | |
repo_id=repo_id, | |
task="feature-extraction", | |
huggingfacehub_api_token= token, | |
) | |
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings | |
embeddings = HuggingFaceInferenceAPIEmbeddings( | |
api_key=token, model_name="sentence-transformers/all-MiniLM-l6-v2" | |
) | |
def main(): | |
st.set_page_config(page_title="Ask your PDF") | |
st.header("Ask your PDF 💬") | |
# upload file | |
pdf = st.file_uploader("Upload your PDF", type="pdf") | |
# extract the text | |
if pdf is not None: | |
pdf_reader = PdfReader(pdf) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
# split into chunks | |
text_splitter = CharacterTextSplitter( | |
separator="\n", | |
chunk_size=1000, | |
chunk_overlap=200, | |
length_function=len | |
) | |
chunks = text_splitter.split_text(text) | |
# create embeddings | |
# embeddings = OpenAIEmbeddings() | |
# embeddings = query(chunks) | |
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
knowledge_base = FAISS.from_texts(chunks, embeddings) | |
# show user input | |
user_question = st.text_input("Ask a question about your PDF:") | |
if user_question: | |
docs = knowledge_base.similarity_search(user_question) | |
# llm = OpenAI() | |
hub_llm = HuggingFaceHub( | |
repo_id='HuggingFaceH4/zephyr-7b-beta', | |
model_kwargs={'temperature':0.01,"max_length": 2048,}, | |
huggingfacehub_api_token=token) | |
llm = hub_llm | |
chain = load_qa_chain(llm, chain_type="stuff") | |
with get_openai_callback() as cb: | |
response = chain.run(input_documents=docs, question=user_question) | |
print(cb) | |
st.write(response) | |
if __name__ == '__main__': | |
main() | |