File size: 4,537 Bytes
6728994
 
 
 
 
 
 
 
 
 
 
 
 
 
600695a
6728994
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import os
import streamlit as st
import fitz
from PIL import Image
import tempfile
from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain.chains.question_answering import load_qa_chain
from langchain_openai import OpenAI
from docx import Document
import io



# Set OpenAI API key
os.environ["OPENAI_API_KEY"] = "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"

# Initialize conversation history list
if "conversation_history" not in st.session_state:
    st.session_state.conversation_history = []

# Function to load document and perform question answering (cached)
from docx import Document

@st.cache_data
def process_document(uploaded_file, query):
    # Save uploaded file to temporary directory
    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
        tmp_file.write(uploaded_file.read())

        # Load document based on file type
        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
        if file_extension == ".pdf":
            loader = PyPDFLoader(tmp_file.name)
            document_text = None
        elif file_extension == ".docx":
            loader = Docx2txtLoader(tmp_file.name)
            document = Document(tmp_file.name)
            document_text = "\n".join([paragraph.text for paragraph in document.paragraphs])
        else:
            st.error("Unsupported file format. Please upload a text file (.txt), a PDF file (.pdf), or a Word document (.docx).")
            return "", None

    documents = loader.load()

    # Load QA chain
    chain = load_qa_chain(llm=OpenAI(), verbose=True)

    # Perform question answering
    response = chain.invoke({"input_documents": documents, "question": query})

    # Remove temporary file
    os.unlink(tmp_file.name)

    return response["output_text"], document_text





# Function to update conversation history
def update_conversation(query, response):
    st.session_state.conversation_history.append({"question": query, "answer": response})

# Function to convert PDF pages to images
def pdf_to_images(pdf_bytes):
    doc = fitz.open("pdf", pdf_bytes)
    images = []

    for page_num in range(doc.page_count):
        page = doc[page_num]
        image = page.get_pixmap()
        img = Image.frombytes("RGB", [image.width, image.height], image.samples)
        images.append(img)

    return images

# Streamlit UI
def main():
    # Set sidebar title
    st.sidebar.title("7steps.AI")
    st.sidebar.markdown("---")

    # File uploader for document in sidebar
    uploaded_file = st.sidebar.file_uploader("Upload a document", type=["pdf", "docx"])

    # Display document content or images
    if uploaded_file is not None:
        st.title("Document Content")
        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
        if file_extension in [".docx"]:
            _, document_text = process_document(uploaded_file, "")
            if document_text is not None:
                st.text_area("Document Text", value=document_text, height=300)
        elif file_extension == ".pdf":
            images = pdf_to_images(uploaded_file.getvalue())
            if images:
                page_number = st.number_input("Page Number", value=1, min_value=1, max_value=len(images))
                st.image(images[page_number - 1], caption=f"Page {page_number}", use_column_width=True)

                # Download button for images
                img_bytes = io.BytesIO()
                images[page_number - 1].save(img_bytes, format='PNG')
                st.download_button("Download Image", img_bytes.getvalue(), f'Page_{page_number}.png')

    # Text box for new question in sidebar
    query = st.sidebar.text_input("Enter your question:")

    # "Ask" button in sidebar
    if st.sidebar.button("Ask"):
        if uploaded_file is not None:
            # Process document and display response
            response, _ = process_document(uploaded_file, query)
            if response:  # Check if response is not empty
                # Update conversation history
                update_conversation(query, response)
        else:
            st.sidebar.write("Please upload a document first.")

    # Display conversation history
    st.title("Conversation History")
    for item in st.session_state.conversation_history:
        st.write("You:", item["question"])
        st.write("AI:", item["answer"])

# Run the application
if __name__ == "__main__":
    main()