amornpan commited on
Commit
5324297
1 Parent(s): 96c3305

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -136
app.py CHANGED
@@ -1,148 +1,120 @@
1
  import streamlit as st
2
  import requests
3
- from datetime import datetime
4
  import base64
5
  import os
6
  import re
7
 
8
- if "dialog_done" not in st.session_state:
9
- st.session_state.dialog_done = False
10
-
11
- if not st.session_state.dialog_done:
12
- st.title("เริ่มต้นคำถาม")
13
- st.write("คำถาม: ใครคือคนที่หน้าตาดีที่สุด?")
14
-
15
- answer = st.text_input("กรุณาตอบคำถามนี้:")
16
- if st.button("ส่งคำตอบ"):
17
- if answer == "พี่ก้องคนหล่อ":
18
- st.session_state.dialog_done = True
19
- st.success("คำตอบถูกต้อง! กดปุ่ม 'เข้าสู่หน้าหลัก' เพื่อดำเนินการต่อ")
20
- else:
21
- st.error("คำตอบไม่ถูกต้อง กรุณาลองใหม่อีกครั้ง.")
22
-
23
- if st.session_state.dialog_done:
24
- if st.button("เข้าสู่หน้าหลัก"):
25
- st.experimental_rerun()
26
-
27
- else:
28
- st.title("AI สนับสนุนความรู้ด้าน PDPA")
29
- st.write("เราสอบถาม AI สืบค้น และสรุป")
30
-
31
- system_prompt = "คุณเป็นผู้ช่วยที่มีความรู้ด้านกฎหมาย PDPA และสามารถให้คำตอบที่เกี่ยวข้องเฉพาะตาม context ที่ได้รับ"
32
-
33
- def clean_text_for_search(text):
34
- text = re.sub(r'P-\d+\s*$', '', text, flags=re.MULTILINE)
35
- text = re.sub(r'Confidential.*$', '', text, flags=re.MULTILINE)
36
- text = ' '.join(text.split())
37
- return text
38
-
39
- def create_highlighted_pdf(pdf_path, search_text, page_number):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  try:
41
- search_text = clean_text_for_search(search_text)
42
- doc = fitz.open(pdf_path)
43
- page = doc[int(page_number) - 1]
44
- words = [word for word in search_text.split() if word]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- for word in words:
47
- if len(word) > 3:
48
- text_instances = page.search_for(word)
49
- for inst in text_instances:
50
- highlight = page.add_highlight_annot(inst)
51
- highlight.set_colors(stroke=(1, 1, 0))
52
- highlight.update()
53
 
54
- new_doc = fitz.open()
55
- new_doc.insert_pdf(doc, from_page=int(page_number) - 1, to_page=int(page_number) - 1)
56
- pdf_bytes = new_doc.write()
57
 
58
- doc.close()
59
- new_doc.close()
 
 
60
 
61
- return pdf_bytes
 
 
 
 
62
 
63
- except Exception as e:
64
- st.error(f"Error in create_highlighted_pdf: {str(e)}")
65
- return None
66
-
67
- def format_file_size(size_in_bytes):
68
- for unit in ['B', 'KB', 'MB', 'GB']:
69
- if size_in_bytes < 1024:
70
- return f"{size_in_bytes:.2f} {unit}"
71
- size_in_bytes /= 1024
72
- return f"{size_in_bytes:.2f} GB"
73
-
74
- def display_search_result(result, index):
75
- with st.expander(f"🔍 Search Result #{index + 1} (Score: {result['score']:.4f})"):
76
- st.markdown("#### 📄 Document Information")
77
- col1, col2 = st.columns(2)
78
-
79
- with col1:
80
- st.markdown("**File Details:**")
81
- st.write(f"• File Name: {result['metadata']['file_name']}")
82
- st.write(f"• Page: {result['metadata']['page_label']}")
83
- st.write(f"• Type: {result['metadata']['file_type']}")
84
- st.write(f"• Size: {format_file_size(result['metadata']['file_size'])}")
85
-
86
- with col2:
87
- st.markdown("**Dates:**")
88
- st.write(f"• Created: {result['metadata']['creation_date']}")
89
- st.write(f"• Modified: {result['metadata']['last_modified_date']}")
90
-
91
- st.markdown("#### 📝 Content")
92
- st.markdown(f"```\n{result['text']}\n```")
93
-
94
- try:
95
- pdf_path = result['file_path']
96
- if os.path.exists(pdf_path):
97
- st.markdown("#### 📄 PDF Preview (with highlighted text)")
98
-
99
- highlighted_pdf = create_highlighted_pdf(
100
- pdf_path,
101
- result['text'],
102
- result['metadata']['page_label']
103
- )
104
-
105
- if highlighted_pdf:
106
- base64_pdf = base64.b64encode(highlighted_pdf).decode('utf-8')
107
- pdf_display = f'''
108
- <iframe
109
- src="data:application/pdf;base64,{base64_pdf}"
110
- width="100%"
111
- height="800px"
112
- type="application/pdf"
113
- style="border: 1px solid #ccc; border-radius: 5px;"
114
- ></iframe>
115
- '''
116
- st.markdown(pdf_display, unsafe_allow_html=True)
117
- else:
118
- st.error("Failed to create highlighted PDF")
119
- except Exception as e:
120
- st.error(f"Error displaying PDF: {str(e)}")
121
-
122
- if "chat_history" not in st.session_state:
123
- st.session_state.chat_history = []
124
-
125
- with st.form(key="input_form"):
126
- user_input = st.text_input("You:", key="input")
127
- submit_button = st.form_submit_button("Send")
128
-
129
- if submit_button:
130
- if user_input:
131
- if st.session_state.chat_history:
132
- st.session_state.chat_history.insert(0, ("###", "###"))
133
- st.session_state.chat_history.insert(0, ("You", user_input))
134
-
135
- try:
136
- response = requests.post("http://113.53.253.50:8002/search", json={"query": user_input})
137
- response.raise_for_status()
138
- data = response.json()
139
- search_results = data["results"]
140
-
141
- st.markdown("### 🔎 Search Results")
142
- for idx, result in enumerate(search_results):
143
- display_search_result(result, idx)
144
-
145
- response_text = "\n\n---\n\n".join([f"Text: {result['text']}" for result in search_results])
146
-
147
- except requests.RequestException as e:
148
- st.error(f"Error: {str(e)}")
 
1
  import streamlit as st
2
  import requests
 
3
  import base64
4
  import os
5
  import re
6
 
7
+ st.title("AI สนับสนุนความรู้ด้าน PDPA")
8
+ st.write("เราสอบถาม AI สืบค้น และสรุป")
9
+
10
+ system_prompt = "คุณเป็นผู้ช่วยที่มีความรู้ด้านกฎหมาย PDPA และสามารถให้คำตอบที่เกี่ยวข้องเฉพาะตาม context ที่ได้รับ"
11
+
12
+ def clean_text_for_search(text):
13
+ return ' '.join(re.sub(r'P-\d+\s*$', '', text, flags=re.MULTILINE).split())
14
+
15
+ def create_highlighted_pdf(pdf_path, search_text, page_number):
16
+ try:
17
+ search_text = clean_text_for_search(search_text)
18
+ doc = fitz.open(pdf_path)
19
+ page = doc[int(page_number) - 1]
20
+ words = [word for word in search_text.split() if word]
21
+
22
+ for word in words:
23
+ if len(word) > 3:
24
+ text_instances = page.search_for(word)
25
+ for inst in text_instances:
26
+ highlight = page.add_highlight_annot(inst)
27
+ highlight.set_colors(stroke=(1, 1, 0))
28
+ highlight.update()
29
+
30
+ new_doc = fitz.open()
31
+ new_doc.insert_pdf(doc, from_page=int(page_number) - 1, to_page=int(page_number) - 1)
32
+ pdf_bytes = new_doc.write()
33
+
34
+ doc.close()
35
+ new_doc.close()
36
+
37
+ return pdf_bytes
38
+ except Exception as e:
39
+ st.error(f"Error in create_highlighted_pdf: {str(e)}")
40
+ return None
41
+
42
+ def format_file_size(size_in_bytes):
43
+ for unit in ['B', 'KB', 'MB', 'GB']:
44
+ if size_in_bytes < 1024:
45
+ return f"{size_in_bytes:.2f} {unit}"
46
+ size_in_bytes /= 1024
47
+ return f"{size_in_bytes:.2f} GB"
48
+
49
+ def display_search_result(result, index):
50
+ with st.expander(f"🔍 Search Result #{index + 1} (Score: {result['score']:.4f})"):
51
+ st.markdown("#### 📄 Document Information")
52
+ col1, col2 = st.columns(2)
53
+
54
+ with col1:
55
+ st.markdown("**File Details:**")
56
+ st.write(f"• File Name: {result['metadata']['file_name']}")
57
+ st.write(f"• Page: {result['metadata']['page_label']}")
58
+ st.write(f"• Type: {result['metadata']['file_type']}")
59
+ st.write(f"• Size: {format_file_size(result['metadata']['file_size'])}")
60
+
61
+ with col2:
62
+ st.markdown("**Dates:**")
63
+ st.write(f"• Created: {result['metadata']['creation_date']}")
64
+ st.write(f"• Modified: {result['metadata']['last_modified_date']}")
65
+
66
+ st.markdown("#### 📝 Content")
67
+ st.markdown(f"```\n{result['text']}\n```")
68
+
69
  try:
70
+ pdf_path = result['file_path']
71
+ if os.path.exists(pdf_path):
72
+ st.markdown("#### 📄 PDF Preview (with highlighted text)")
73
+
74
+ highlighted_pdf = create_highlighted_pdf(
75
+ pdf_path,
76
+ result['text'],
77
+ result['metadata']['page_label']
78
+ )
79
+
80
+ if highlighted_pdf:
81
+ base64_pdf = base64.b64encode(highlighted_pdf).decode('utf-8')
82
+ pdf_display = f'''
83
+ <iframe
84
+ src="data:application/pdf;base64,{base64_pdf}"
85
+ width="100%"
86
+ height="800px"
87
+ type="application/pdf"
88
+ style="border: 1px solid #ccc; border-radius: 5px;"
89
+ ></iframe>
90
+ '''
91
+ st.markdown(pdf_display, unsafe_allow_html=True)
92
+ else:
93
+ st.error("Failed to create highlighted PDF")
94
+ except Exception as e:
95
+ st.error(f"Error displaying PDF: {str(e)}")
96
 
97
+ if "chat_history" not in st.session_state:
98
+ st.session_state.chat_history = []
 
 
 
 
 
99
 
100
+ with st.form(key="input_form"):
101
+ user_input = st.text_input("You:", key="input")
102
+ submit_button = st.form_submit_button("Send")
103
 
104
+ if submit_button and user_input:
105
+ if st.session_state.chat_history:
106
+ st.session_state.chat_history.insert(0, ("###", "###"))
107
+ st.session_state.chat_history.insert(0, ("You", user_input))
108
 
109
+ try:
110
+ response = requests.post("http://113.53.253.50:8002/search", json={"query": user_input})
111
+ response.raise_for_status()
112
+ data = response.json()
113
+ search_results = data["results"]
114
 
115
+ st.markdown("### 🔎 Search Results")
116
+ for idx, result in enumerate(search_results):
117
+ display_search_result(result, idx)
118
+
119
+ except requests.RequestException as e:
120
+ st.error(f"Error: {str(e)}")