Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
from pathlib import Path | |
from io import StringIO | |
#for textfiles | |
from langchain.document_loaders import TextLoader | |
#text splitter | |
from langchain.text_splitter import CharacterTextSplitter | |
#for using HugginFace models & embeddings | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain import HuggingFaceHub | |
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html | |
from langchain.vectorstores import FAISS | |
#facebook vectorization | |
from langchain.chains.question_answering import load_qa_chain | |
#load pdf | |
#vectorize db index with chromadb | |
from langchain.indexes import VectorstoreIndexCreator | |
from langchain.chains import RetrievalQA | |
from langchain.document_loaders import UnstructuredPDFLoader | |
os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["hf_api_key"] | |
def init(): | |
global embeddings, llm, llm2, chain | |
# Embeddings | |
embeddings = HuggingFaceEmbeddings() | |
llm=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512}) | |
chain = load_qa_chain(llm, chain_type="stuff") | |
def pdf_file(txtFileObj): | |
st.subheader('Uploaded PDF File:') | |
st.write(txtFileObj.name) | |
with open(txtFileObj.name, "wb") as f: | |
f.write(txtFileObj.getbuffer()) | |
loaders = [UnstructuredPDFLoader(txtFileObj.name)] | |
index = VectorstoreIndexCreator( | |
embedding=embeddings, | |
text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loaders) | |
chain = RetrievalQA.from_chain_type(llm=llm, | |
chain_type="stuff", | |
retriever=index.vectorstore.as_retriever(), | |
input_key="question") | |
st.subheader('Enter query') | |
query = st.text_input('Ask anything about the Document you uploaded') | |
if (query): | |
answer = chain.run(question=query) | |
st.subheader('Answer') | |
st.write(answer) | |
def text_file(txtFileObj): | |
st.subheader('Uploaded Text File:') | |
st.write(txtFileObj.name) | |
#stringio = StringIO(txtFileObj.getvalue().decode("utf-8")) | |
with open(txtFileObj.name, "wb") as f: | |
f.write(txtFileObj.getbuffer()) | |
loader = TextLoader(txtFileObj.name) | |
documents = loader.load() | |
# Text Splitter | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) | |
docs = text_splitter.split_documents(documents) | |
db = FAISS.from_documents(docs, embeddings) | |
st.subheader('Enter query') | |
query = st.text_input('Ask anything about the Document you uploaded') | |
if (query): | |
docs = db.similarity_search(query) | |
answer = chain.run(input_documents=docs, question=query) | |
st.subheader('Answer') | |
st.write(answer) | |
st.title('Document Q&A - Ask anything in your Document') | |
st.subheader('This application can be used to upload text(.txt) and PDF(.pdf) files and ask questions about their contents.') | |
init() | |
st.sidebar.subheader('Upload document') | |
uploaded_file = st.sidebar.file_uploader("Upload File",type=['txt','pdf']) | |
if uploaded_file and Path(uploaded_file.name).suffix == '.txt': | |
st.sidebar.info(Path(uploaded_file.name)) | |
text_file(uploaded_file) | |
if uploaded_file and Path(uploaded_file.name).suffix == '.pdf': | |
pdf_file(uploaded_file) | |
with st.sidebar.expander('File'): | |
if (uploaded_file): | |
st.info(uploaded_file.name) | |
if os.path.exists('/content/'): | |
st.info(os.listdir('/content/')) |