Spaces:

raseel-zymr
/

Document-QandA

Sleeping

App Files Files Community

raseel-zymr commited on Jun 20, 2023

Commit

5aee298

•

1 Parent(s): f70522c

Added logic to upload file , ask questions and get answers

Browse files

Files changed (2) hide show

.gitignore +1 -0
app.py +49 -30

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	__pycache__


1	__pycache__
2	+ .streamlit

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import streamlit as st
 #for textfiles
 from langchain.document_loaders import TextLoader
@@ -20,49 +21,67 @@ os.environ["HUGGINGFACEHUB_API_TOKEN"] = st.secrets["hf_api_key"]
 st.title('Document Q&A - Ask anything in your Document')
 st.sidebar.subheader('Upload document')
 uploaded_file = st.sidebar.file_uploader("Upload File",type=['txt','pdf'])
 # url2 = "https://github.com/fabiomatricardi/cdQnA/raw/main/KS-all-info_rev1.txt"
 # res = requests.get(url2)
 # with open("KS-all-info_rev1.txt", "w") as f:
 #   f.write(res.text)
-st.subheader('Enter query')
-query = st.text_input('Ask anything about the Document you uploaded')
-st.subheader('Answer')
-st.write('Answer from document')
-# # Document Loader
-# loader = TextLoader('./KS-all-info_rev1.txt')
-# documents = loader.load()
-# import textwrap
-# def wrap_text_preserve_newlines(text, width=110):
-#     # Split the input text into lines based on newline characters
-#     lines = text.split('\n')
-#     # Wrap each line individually
-#     wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
-#     # Join the wrapped lines back together using newline characters
-#     wrapped_text = '\n'.join(wrapped_lines)
-#     return wrapped_text
-# # Text Splitter
-# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
-# docs = text_splitter.split_documents(documents)
-# # Embeddings
-# embeddings = HuggingFaceEmbeddings()
-# #Create the vectorized db
-# db = FAISS.from_documents(docs, embeddings)
-# llm=HuggingFaceHub(repo_id="google/flan-t5-xl", model_kwargs={"temperature":0, "max_length":512})
-# llm2=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512})
-# chain = load_qa_chain(llm2, chain_type="stuff")
-# # Sample question
-# # query = "What the actual issues and drawbacks ?"
-# # docs = db.similarity_search(query)
-# # chain.run(input_documents=docs, question=query)
 # # PDFs

 import os
 import streamlit as st
+from io import StringIO
 #for textfiles
 from langchain.document_loaders import TextLoader
 st.title('Document Q&A - Ask anything in your Document')
 st.sidebar.subheader('Upload document')
 uploaded_file = st.sidebar.file_uploader("Upload File",type=['txt','pdf'])
+with st.sidebar.expander('File'):
+    if(uploaded_file):
+    	st.info(uploaded_file.name)
 # url2 = "https://github.com/fabiomatricardi/cdQnA/raw/main/KS-all-info_rev1.txt"
 # res = requests.get(url2)
 # with open("KS-all-info_rev1.txt", "w") as f:
 #   f.write(res.text)
+if (uploaded_file):
+	st.subheader('Enter query')
+	query = st.text_input('Ask anything about the Document you uploaded')
+	stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
+	with open(uploaded_file.name, "w") as f:
+  		f.write(stringio.read())
+if(uploaded_file):
+	loader = TextLoader(uploaded_file.name)
+	documents = loader.load()
+# Document Loader
+#loader = TextLoader('./KS-all-info_rev1.txt')
+# loader = TextLoader(os.path.join("./", uploaded_file.name))
+	# import textwrap
+	# def wrap_text_preserve_newlines(text, width=110):
+    # 	# Split the input text into lines based on newline characters
+    # 	lines = text.split('\n')
+    # 	# Wrap each line individually
+    # 	wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
+    # 	# Join the wrapped lines back together using newline characters
+    # 	wrapped_text = '\n'.join(wrapped_lines)
+    # 	return wrapped_text
+	# Text Splitter
+	text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
+	docs = text_splitter.split_documents(documents)
+	# Embeddings
+	embeddings = HuggingFaceEmbeddings()
+	#Create the vectorized db
+	db = FAISS.from_documents(docs, embeddings)
+	#llm=HuggingFaceHub(repo_id="google/flan-t5-xl", model_kwargs={"temperature":0, "max_length":512})
+	llm2=HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large", model_kwargs={"temperature":0, "max_length":512})
+	chain = load_qa_chain(llm2, chain_type="stuff")
+	# Sample question
+	#query = "What the actual issues and drawbacks ?"
+	docs = db.similarity_search(query)
+	answer = chain.run(input_documents=docs, question=query)
+	st.subheader('Answer')
+	st.write(answer)
 # # PDFs