capradeepgujaran commited on
Commit
ae25925
β€’
1 Parent(s): f9ae432

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -126
app.py CHANGED
@@ -15,132 +15,33 @@ import logging
15
  from openai_tts_tool import generate_audio_and_text
16
  import tempfile
17
 
18
- # Set up logging configuration
19
- logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
20
 
21
- # Initialize global variables
22
- vector_index = None
23
- query_log = []
24
- sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
25
-
26
- # Get available languages for OCR
27
- try:
28
- langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
29
- except:
30
- langs = ['eng'] # Fallback to English if tesseract isn't properly configured
31
-
32
- def create_temp_dir():
33
- """Create temporary directory if it doesn't exist"""
34
  temp_dir = os.path.join(os.getcwd(), 'temp')
35
  if not os.path.exists(temp_dir):
36
  os.makedirs(temp_dir)
37
- return temp_dir
38
-
39
- def preprocess_image(image_path):
40
- img = cv2.imread(image_path)
41
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
42
- gray = cv2.equalizeHist(gray)
43
- gray = cv2.GaussianBlur(gray, (5, 5), 0)
44
- processed_image = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
45
- cv2.THRESH_BINARY, 11, 2)
46
- temp_dir = create_temp_dir()
47
- temp_filename = os.path.join(temp_dir, "processed_image.png")
48
- cv2.imwrite(temp_filename, processed_image)
49
- return temp_filename
50
-
51
- def extract_text_from_image(image_path, lang='eng'):
52
- processed_image_path = preprocess_image(image_path)
53
- text = pytesseract.image_to_string(Image.open(processed_image_path), lang=lang)
54
- try:
55
- os.remove(processed_image_path)
56
- except:
57
- pass
58
- return text
59
-
60
- def extract_text_from_pdf(pdf_path, lang='eng'):
61
- text = ""
62
- temp_dir = create_temp_dir()
63
- try:
64
- with open(pdf_path, 'rb') as file:
65
- pdf_reader = PyPDF2.PdfReader(file)
66
- for page_num in range(len(pdf_reader.pages)):
67
- page = pdf_reader.pages[page_num]
68
- page_text = page.extract_text()
69
- if page_text.strip():
70
- text += page_text
71
- else:
72
- images = convert_from_path(pdf_path, first_page=page_num + 1, last_page=page_num + 1)
73
- for image in images:
74
- temp_image_path = os.path.join(temp_dir, f'temp_image_{page_num}.png')
75
- image.save(temp_image_path, 'PNG')
76
- text += extract_text_from_image(temp_image_path, lang=lang)
77
- text += f"\n[OCR applied on page {page_num + 1}]\n"
78
- try:
79
- os.remove(temp_image_path)
80
- except:
81
- pass
82
- except Exception as e:
83
- return f"Error processing PDF: {str(e)}"
84
- return text
85
-
86
- def extract_text(file_path, lang='eng'):
87
- file_ext = file_path.lower().split('.')[-1]
88
- if file_ext in ['pdf']:
89
- return extract_text_from_pdf(file_path, lang)
90
- elif file_ext in ['png', 'jpg', 'jpeg']:
91
- return extract_text_from_image(file_path, lang)
92
- else:
93
- return f"Unsupported file type: {file_ext}"
94
-
95
- def process_upload(api_key, files, lang):
96
- global vector_index
97
-
98
- if not api_key:
99
- return "Please provide a valid OpenAI API Key."
100
-
101
- if not files:
102
- return "No files uploaded."
103
-
104
- documents = []
105
- error_messages = []
106
- image_heavy_docs = []
107
-
108
- for file_path in files:
109
- try:
110
- text = extract_text(file_path, lang)
111
- if "This document consists of" in text and "page(s) of images" in text:
112
- image_heavy_docs.append(os.path.basename(file_path))
113
- documents.append(Document(text=text))
114
- except Exception as e:
115
- error_message = f"Error processing file {os.path.basename(file_path)}: {str(e)}"
116
- logging.error(error_message)
117
- error_messages.append(error_message)
118
-
119
- if documents:
120
- try:
121
- embed_model = OpenAIEmbedding(model="text-embedding-3-large", api_key=api_key)
122
- vector_index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
123
-
124
- success_message = f"Successfully indexed {len(documents)} files."
125
- if image_heavy_docs:
126
- success_message += f"\nNote: The following documents consist mainly of images and may require manual review: {', '.join(image_heavy_docs)}"
127
- if error_messages:
128
- success_message += f"\nErrors: {'; '.join(error_messages)}"
129
-
130
- return success_message
131
- except Exception as e:
132
- return f"Error creating index: {str(e)}"
133
- else:
134
- return f"No valid documents were indexed. Errors: {'; '.join(error_messages)}"
135
 
136
  def query_app(query, model_name, use_similarity_check, api_key):
137
  global vector_index, query_log
138
 
139
  if vector_index is None:
140
- return "No documents indexed yet. Please upload documents first."
141
 
142
  if not api_key:
143
- return "Please provide a valid OpenAI API Key."
144
 
145
  try:
146
  llm = OpenAI(model=model_name, api_key=api_key)
@@ -149,22 +50,25 @@ def query_app(query, model_name, use_similarity_check, api_key):
149
  response = query_engine.query(query)
150
 
151
  generated_response = response.response
152
- return generated_response
 
153
 
154
  except Exception as e:
155
  logging.error(f"Error during query processing: {e}")
156
- return f"Error during query processing: {str(e)}"
157
 
158
  def create_gradio_interface():
159
  with gr.Blocks(title="Document Processing and TTS App") as demo:
160
  gr.Markdown("# πŸ“„ Document Processing, Text & Audio Generation App")
161
 
 
 
 
 
 
 
 
162
  with gr.Tab("πŸ“€ Upload Documents"):
163
- api_key_input = gr.Textbox(
164
- label="Enter OpenAI API Key",
165
- placeholder="Paste your OpenAI API Key here",
166
- type="password"
167
- )
168
  file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
169
  lang_dropdown = gr.Dropdown(choices=langs, label="Select OCR Language", value='eng')
170
  upload_button = gr.Button("Upload and Index")
@@ -213,8 +117,10 @@ def create_gradio_interface():
213
  )
214
  additional_prompt = gr.Textbox(label="Additional Prompt (Optional)")
215
  generate_button = gr.Button("Generate")
216
- audio_output = gr.Audio(label="Generated Audio")
217
- summary_output = gr.Textbox(label="Generated Summary Text")
 
 
218
 
219
  # Wire up the components
220
  upload_button.click(
@@ -223,14 +129,21 @@ def create_gradio_interface():
223
  outputs=[upload_status]
224
  )
225
 
 
226
  query_button.click(
227
  fn=query_app,
228
  inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input],
229
- outputs=[answer_output]
230
  )
231
 
 
 
 
 
 
 
232
  generate_button.click(
233
- fn=generate_audio_and_text,
234
  inputs=[
235
  api_key_input, text_input, model_dropdown, voice_type,
236
  voice_speed, language, output_option, summary_length,
 
15
  from openai_tts_tool import generate_audio_and_text
16
  import tempfile
17
 
18
+ # [Previous imports and initialization code remains the same...]
 
19
 
20
+ def create_summary_file(summary_text):
21
+ """Create a downloadable file from the summary text"""
22
+ if not summary_text:
23
+ return None
24
+
 
 
 
 
 
 
 
 
25
  temp_dir = os.path.join(os.getcwd(), 'temp')
26
  if not os.path.exists(temp_dir):
27
  os.makedirs(temp_dir)
28
+
29
+ # Create a unique filename
30
+ summary_file = os.path.join(temp_dir, f"summary_{hash(summary_text)}.txt")
31
+
32
+ with open(summary_file, 'w', encoding='utf-8') as f:
33
+ f.write(summary_text)
34
+
35
+ return summary_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  def query_app(query, model_name, use_similarity_check, api_key):
38
  global vector_index, query_log
39
 
40
  if vector_index is None:
41
+ return "No documents indexed yet. Please upload documents first.", None
42
 
43
  if not api_key:
44
+ return "Please provide a valid OpenAI API Key.", None
45
 
46
  try:
47
  llm = OpenAI(model=model_name, api_key=api_key)
 
50
  response = query_engine.query(query)
51
 
52
  generated_response = response.response
53
+ # Return both the response and the same response (to update the text generation input)
54
+ return generated_response, generated_response
55
 
56
  except Exception as e:
57
  logging.error(f"Error during query processing: {e}")
58
+ return f"Error during query processing: {str(e)}", None
59
 
60
  def create_gradio_interface():
61
  with gr.Blocks(title="Document Processing and TTS App") as demo:
62
  gr.Markdown("# πŸ“„ Document Processing, Text & Audio Generation App")
63
 
64
+ # Store API key at the top level to share across tabs
65
+ api_key_input = gr.Textbox(
66
+ label="Enter OpenAI API Key",
67
+ placeholder="Paste your OpenAI API Key here",
68
+ type="password"
69
+ )
70
+
71
  with gr.Tab("πŸ“€ Upload Documents"):
 
 
 
 
 
72
  file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
73
  lang_dropdown = gr.Dropdown(choices=langs, label="Select OCR Language", value='eng')
74
  upload_button = gr.Button("Upload and Index")
 
117
  )
118
  additional_prompt = gr.Textbox(label="Additional Prompt (Optional)")
119
  generate_button = gr.Button("Generate")
120
+
121
+ with gr.Row():
122
+ audio_output = gr.Audio(label="Generated Audio")
123
+ summary_output = gr.File(label="Generated Summary Text")
124
 
125
  # Wire up the components
126
  upload_button.click(
 
129
  outputs=[upload_status]
130
  )
131
 
132
+ # Modified to update both answer output and text generation input
133
  query_button.click(
134
  fn=query_app,
135
  inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input],
136
+ outputs=[answer_output, text_input] # Now updates both outputs
137
  )
138
 
139
+ # Modified to handle file output
140
+ def process_generation(*args):
141
+ audio_file, summary_text = generate_audio_and_text(*args)
142
+ summary_file = create_summary_file(summary_text) if summary_text else None
143
+ return audio_file, summary_file
144
+
145
  generate_button.click(
146
+ fn=process_generation,
147
  inputs=[
148
  api_key_input, text_input, model_dropdown, voice_type,
149
  voice_speed, language, output_option, summary_length,