Spaces:

raseel-zymr
/

Document-QandA

Sleeping

App Files Files Community

Document-QandA / app.py

raseel-zymr

Finalised TXT and PDF files

37c7e44 over 1 year ago

raw

history blame contribute delete

3.26 kB

	import os
	import streamlit as st
	from pathlib import Path
	from io import StringIO

	#for textfiles
	from langchain.document_loaders import TextLoader
	#text splitter
	from langchain.text_splitter import CharacterTextSplitter
	#for using HugginFace models & embeddings
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain import HuggingFaceHub
	# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
	from langchain.vectorstores import FAISS
	#facebook vectorization
	from langchain.chains.question_answering import load_qa_chain
	#load pdf
	#vectorize db index with chromadb
	from langchain.indexes import VectorstoreIndexCreator
	from langchain.chains import RetrievalQA
	from langchain.document_loaders import UnstructuredPDFLoader

	os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["hf_api_key"]

	def init():
	global embeddings, llm, llm2, chain
	# Embeddings
	embeddings = HuggingFaceEmbeddings()
	llm=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512})
	chain = load_qa_chain(llm, chain_type="stuff")

	def pdf_file(txtFileObj):
	st.subheader('Uploaded PDF File:')
	st.write(txtFileObj.name)

	with open(txtFileObj.name, "wb") as f:
	f.write(txtFileObj.getbuffer())

	loaders = [UnstructuredPDFLoader(txtFileObj.name)]
	index = VectorstoreIndexCreator(
	embedding=embeddings,
	text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)).from_loaders(loaders)

	chain = RetrievalQA.from_chain_type(llm=llm,
	chain_type="stuff",
	retriever=index.vectorstore.as_retriever(),
	input_key="question")

	st.subheader('Enter query')
	query = st.text_input('Ask anything about the Document you uploaded')

	if (query):
	answer = chain.run(question=query)

	st.subheader('Answer')
	st.write(answer)

	def text_file(txtFileObj):
	st.subheader('Uploaded Text File:')
	st.write(txtFileObj.name)

	#stringio = StringIO(txtFileObj.getvalue().decode("utf-8"))
	with open(txtFileObj.name, "wb") as f:
	f.write(txtFileObj.getbuffer())

	loader = TextLoader(txtFileObj.name)
	documents = loader.load()

	# Text Splitter
	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
	docs = text_splitter.split_documents(documents)

	db = FAISS.from_documents(docs, embeddings)

	st.subheader('Enter query')
	query = st.text_input('Ask anything about the Document you uploaded')

	if (query):
	docs = db.similarity_search(query)
	answer = chain.run(input_documents=docs, question=query)

	st.subheader('Answer')
	st.write(answer)

	st.title('Document Q&A - Ask anything in your Document')
	st.subheader('This application can be used to upload text(.txt) and PDF(.pdf) files and ask questions about their contents.')

	init()

	st.sidebar.subheader('Upload document')
	uploaded_file = st.sidebar.file_uploader("Upload File",type=['txt','pdf'])

	if uploaded_file and Path(uploaded_file.name).suffix == '.txt':
	st.sidebar.info(Path(uploaded_file.name))
	text_file(uploaded_file)

	if uploaded_file and Path(uploaded_file.name).suffix == '.pdf':
	pdf_file(uploaded_file)

	with st.sidebar.expander('File'):
	if (uploaded_file):
	st.info(uploaded_file.name)
	if os.path.exists('/content/'):
	st.info(os.listdir('/content/'))