File size: 2,379 Bytes
892f4c0 13977d9 79ab819 892f4c0 79ab819 13977d9 892f4c0 13977d9 892f4c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import streamlit as st
import os
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
import re
def main():
# Initialize the Streamlit app
st.title('Document-Based Q&A System')
# API Key input securely
api_key = st.text_input("Enter your OpenAI API key:", type="password")
if api_key:
os.environ["OPENAI_API_KEY"] = api_key
st.success("API Key has been set!")
# File uploader
uploaded_file = st.file_uploader("Upload your document", type=['txt'])
if uploaded_file is not None:
# Read and process the document
text_data = uploaded_file.getvalue().decode("utf-8")
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
data = text_splitter.split_documents(text_data)
# Create vector store
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(data, embedding=embeddings)
# Create conversation chain
llm = ChatOpenAI(temperature=0.3, model_name="gpt-4-turbo")
memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True, output_key='answer')
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(),
memory=memory,
return_source_documents=True
)
# Question input
query = st.text_input("Ask a question about the document:")
if query:
result = conversation_chain({"question": query})
answer = result["answer"]
st.write("Answer:", answer)
# Optionally display source text snippets
if st.checkbox("Show source text snippets"):
st.write("Source documents:")
for i in result["source_documents"]:
res = re.search(r'^[^\n]*', i.page_content)
st.write(i.page_content[res.span()[0]:res.span()[1]])
if __name__ == "__main__":
main()
# Initialize the Streamlit app
# st.title('Document-Based Q&A System')
|