capradeepgujaran commited on
Commit
d9cfca5
β€’
1 Parent(s): e6032f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -77
app.py CHANGED
@@ -23,41 +23,11 @@ vector_index = None
23
  query_log = []
24
  sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
25
 
26
- # Define a fallback list of common OCR languages
27
- DEFAULT_LANGS = [
28
- 'eng', # English
29
- 'fra', # French
30
- 'deu', # German
31
- 'spa', # Spanish
32
- 'ita', # Italian
33
- 'por', # Portuguese
34
- 'nld', # Dutch
35
- 'pol', # Polish
36
- 'tur', # Turkish
37
- 'rus', # Russian
38
- 'ara', # Arabic
39
- 'hin', # Hindi
40
- 'jpn', # Japanese
41
- 'kor', # Korean
42
- 'chi_sim', # Simplified Chinese
43
- 'chi_tra' # Traditional Chinese
44
- ]
45
-
46
- def get_available_languages():
47
- """Get available Tesseract languages with fallback"""
48
- try:
49
- # Try to get languages from Tesseract
50
- langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
51
- if langs and len(langs) > 0:
52
- return sorted(langs)
53
- except Exception as e:
54
- logging.warning(f"Could not get Tesseract languages: {e}")
55
-
56
- # Fallback to default languages
57
- return DEFAULT_LANGS
58
-
59
- # Get available languages once at startup
60
- AVAILABLE_LANGUAGES = get_available_languages()
61
 
62
  def create_temp_dir():
63
  """Create temporary directory if it doesn't exist"""
@@ -66,30 +36,111 @@ def create_temp_dir():
66
  os.makedirs(temp_dir)
67
  return temp_dir
68
 
69
- # [Previous helper functions remain the same...]
 
 
 
 
 
 
 
 
 
 
70
 
71
- def create_summary_file(summary_text):
72
- """Create a downloadable file from the summary text"""
73
- if not summary_text:
74
- return None
75
-
 
 
 
 
 
 
76
  temp_dir = create_temp_dir()
77
- summary_file = os.path.join(temp_dir, f"summary_{hash(summary_text)}.txt")
78
-
79
- with open(summary_file, 'w', encoding='utf-8') as f:
80
- f.write(summary_text)
81
-
82
- return summary_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  def query_app(query, model_name, use_similarity_check, api_key):
85
- """Process a query and return both the answer and the text for generation"""
86
  global vector_index, query_log
87
 
88
  if vector_index is None:
89
- return "No documents indexed yet. Please upload documents first.", None
90
 
91
  if not api_key:
92
- return "Please provide a valid OpenAI API Key.", None
93
 
94
  try:
95
  llm = OpenAI(model=model_name, api_key=api_key)
@@ -98,31 +149,24 @@ def query_app(query, model_name, use_similarity_check, api_key):
98
  response = query_engine.query(query)
99
 
100
  generated_response = response.response
101
- return generated_response, generated_response
102
 
103
  except Exception as e:
104
  logging.error(f"Error during query processing: {e}")
105
- return f"Error during query processing: {str(e)}", None
106
 
107
  def create_gradio_interface():
108
  with gr.Blocks(title="Document Processing and TTS App") as demo:
109
  gr.Markdown("# πŸ“„ Document Processing, Text & Audio Generation App")
110
 
111
- # Store API key at the top level to share across tabs
112
- api_key_input = gr.Textbox(
113
- label="Enter OpenAI API Key",
114
- placeholder="Paste your OpenAI API Key here",
115
- type="password"
116
- )
117
-
118
  with gr.Tab("πŸ“€ Upload Documents"):
119
- file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
120
- lang_dropdown = gr.Dropdown(
121
- choices=AVAILABLE_LANGUAGES,
122
- label="Select OCR Language",
123
- value='eng',
124
- info="Select the primary language of your documents"
125
  )
 
 
126
  upload_button = gr.Button("Upload and Index")
127
  upload_status = gr.Textbox(label="Status", interactive=False)
128
 
@@ -169,10 +213,8 @@ def create_gradio_interface():
169
  )
170
  additional_prompt = gr.Textbox(label="Additional Prompt (Optional)")
171
  generate_button = gr.Button("Generate")
172
-
173
- with gr.Row():
174
- audio_output = gr.Audio(label="Generated Audio")
175
- summary_output = gr.File(label="Generated Summary Text")
176
 
177
  # Wire up the components
178
  upload_button.click(
@@ -184,16 +226,11 @@ def create_gradio_interface():
184
  query_button.click(
185
  fn=query_app,
186
  inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input],
187
- outputs=[answer_output, text_input]
188
  )
189
 
190
- def process_generation(*args):
191
- audio_file, summary_text = generate_audio_and_text(*args)
192
- summary_file = create_summary_file(summary_text) if summary_text else None
193
- return audio_file, summary_file
194
-
195
  generate_button.click(
196
- fn=process_generation,
197
  inputs=[
198
  api_key_input, text_input, model_dropdown, voice_type,
199
  voice_speed, language, output_option, summary_length,
@@ -208,4 +245,60 @@ if __name__ == "__main__":
208
  demo = create_gradio_interface()
209
  demo.launch()
210
  else:
211
- demo = create_gradio_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  query_log = []
24
  sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
25
 
26
+ # Get available languages for OCR
27
+ try:
28
+ langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
29
+ except:
30
+ langs = ['eng'] # Fallback to English if tesseract isn't properly configured
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def create_temp_dir():
33
  """Create temporary directory if it doesn't exist"""
 
36
  os.makedirs(temp_dir)
37
  return temp_dir
38
 
39
+ def preprocess_image(image_path):
40
+ img = cv2.imread(image_path)
41
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
42
+ gray = cv2.equalizeHist(gray)
43
+ gray = cv2.GaussianBlur(gray, (5, 5), 0)
44
+ processed_image = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
45
+ cv2.THRESH_BINARY, 11, 2)
46
+ temp_dir = create_temp_dir()
47
+ temp_filename = os.path.join(temp_dir, "processed_image.png")
48
+ cv2.imwrite(temp_filename, processed_image)
49
+ return temp_filename
50
 
51
+ def extract_text_from_image(image_path, lang='eng'):
52
+ processed_image_path = preprocess_image(image_path)
53
+ text = pytesseract.image_to_string(Image.open(processed_image_path), lang=lang)
54
+ try:
55
+ os.remove(processed_image_path)
56
+ except:
57
+ pass
58
+ return text
59
+
60
+ def extract_text_from_pdf(pdf_path, lang='eng'):
61
+ text = ""
62
  temp_dir = create_temp_dir()
63
+ try:
64
+ with open(pdf_path, 'rb') as file:
65
+ pdf_reader = PyPDF2.PdfReader(file)
66
+ for page_num in range(len(pdf_reader.pages)):
67
+ page = pdf_reader.pages[page_num]
68
+ page_text = page.extract_text()
69
+ if page_text.strip():
70
+ text += page_text
71
+ else:
72
+ images = convert_from_path(pdf_path, first_page=page_num + 1, last_page=page_num + 1)
73
+ for image in images:
74
+ temp_image_path = os.path.join(temp_dir, f'temp_image_{page_num}.png')
75
+ image.save(temp_image_path, 'PNG')
76
+ text += extract_text_from_image(temp_image_path, lang=lang)
77
+ text += f"\n[OCR applied on page {page_num + 1}]\n"
78
+ try:
79
+ os.remove(temp_image_path)
80
+ except:
81
+ pass
82
+ except Exception as e:
83
+ return f"Error processing PDF: {str(e)}"
84
+ return text
85
+
86
+ def extract_text(file_path, lang='eng'):
87
+ file_ext = file_path.lower().split('.')[-1]
88
+ if file_ext in ['pdf']:
89
+ return extract_text_from_pdf(file_path, lang)
90
+ elif file_ext in ['png', 'jpg', 'jpeg']:
91
+ return extract_text_from_image(file_path, lang)
92
+ else:
93
+ return f"Unsupported file type: {file_ext}"
94
+
95
+ def process_upload(api_key, files, lang):
96
+ global vector_index
97
+
98
+ if not api_key:
99
+ return "Please provide a valid OpenAI API Key."
100
+
101
+ if not files:
102
+ return "No files uploaded."
103
+
104
+ documents = []
105
+ error_messages = []
106
+ image_heavy_docs = []
107
+
108
+ for file_path in files:
109
+ try:
110
+ text = extract_text(file_path, lang)
111
+ if "This document consists of" in text and "page(s) of images" in text:
112
+ image_heavy_docs.append(os.path.basename(file_path))
113
+ documents.append(Document(text=text))
114
+ except Exception as e:
115
+ error_message = f"Error processing file {os.path.basename(file_path)}: {str(e)}"
116
+ logging.error(error_message)
117
+ error_messages.append(error_message)
118
+
119
+ if documents:
120
+ try:
121
+ embed_model = OpenAIEmbedding(model="text-embedding-3-large", api_key=api_key)
122
+ vector_index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
123
+
124
+ success_message = f"Successfully indexed {len(documents)} files."
125
+ if image_heavy_docs:
126
+ success_message += f"\nNote: The following documents consist mainly of images and may require manual review: {', '.join(image_heavy_docs)}"
127
+ if error_messages:
128
+ success_message += f"\nErrors: {'; '.join(error_messages)}"
129
+
130
+ return success_message
131
+ except Exception as e:
132
+ return f"Error creating index: {str(e)}"
133
+ else:
134
+ return f"No valid documents were indexed. Errors: {'; '.join(error_messages)}"
135
 
136
  def query_app(query, model_name, use_similarity_check, api_key):
 
137
  global vector_index, query_log
138
 
139
  if vector_index is None:
140
+ return "No documents indexed yet. Please upload documents first."
141
 
142
  if not api_key:
143
+ return "Please provide a valid OpenAI API Key."
144
 
145
  try:
146
  llm = OpenAI(model=model_name, api_key=api_key)
 
149
  response = query_engine.query(query)
150
 
151
  generated_response = response.response
152
+ return generated_response
153
 
154
  except Exception as e:
155
  logging.error(f"Error during query processing: {e}")
156
+ return f"Error during query processing: {str(e)}"
157
 
158
  def create_gradio_interface():
159
  with gr.Blocks(title="Document Processing and TTS App") as demo:
160
  gr.Markdown("# πŸ“„ Document Processing, Text & Audio Generation App")
161
 
 
 
 
 
 
 
 
162
  with gr.Tab("πŸ“€ Upload Documents"):
163
+ api_key_input = gr.Textbox(
164
+ label="Enter OpenAI API Key",
165
+ placeholder="Paste your OpenAI API Key here",
166
+ type="password"
 
 
167
  )
168
+ file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
169
+ lang_dropdown = gr.Dropdown(choices=langs, label="Select OCR Language", value='eng')
170
  upload_button = gr.Button("Upload and Index")
171
  upload_status = gr.Textbox(label="Status", interactive=False)
172
 
 
213
  )
214
  additional_prompt = gr.Textbox(label="Additional Prompt (Optional)")
215
  generate_button = gr.Button("Generate")
216
+ audio_output = gr.Audio(label="Generated Audio")
217
+ summary_output = gr.Textbox(label="Generated Summary Text")
 
 
218
 
219
  # Wire up the components
220
  upload_button.click(
 
226
  query_button.click(
227
  fn=query_app,
228
  inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input],
229
+ outputs=[answer_output]
230
  )
231
 
 
 
 
 
 
232
  generate_button.click(
233
+ fn=generate_audio_and_text,
234
  inputs=[
235
  api_key_input, text_input, model_dropdown, voice_type,
236
  voice_speed, language, output_option, summary_length,
 
245
  demo = create_gradio_interface()
246
  demo.launch()
247
  else:
248
+ demo = create_gradio_interface()/////////////////////////////////////openai_tts_tool.py// from openai import OpenAI
249
+ import tempfile
250
+ import os
251
+
252
+ def generate_audio_and_text(api_key, input_text, model_name, voice_type, voice_speed, language, output_option, summary_length, additional_prompt):
253
+ if not input_text:
254
+ return None, "No input text provided"
255
+
256
+ try:
257
+ client = OpenAI(api_key=api_key)
258
+
259
+ # Generate summary if requested
260
+ summary_text = None
261
+ if output_option in ["summary_text", "both"]:
262
+ summary_prompt = f"Summarize the following text in approximately {summary_length} words. {additional_prompt or ''}\n\nText: {input_text}"
263
+
264
+ summary_response = client.chat.completions.create(
265
+ model=model_name,
266
+ messages=[{"role": "user", "content": summary_prompt}]
267
+ )
268
+ summary_text = summary_response.choices[0].message.content
269
+
270
+ # Generate audio if requested
271
+ audio_file = None
272
+ if output_option in ["audio", "both"]:
273
+ speech_response = client.audio.speech.create(
274
+ model="tts-1", # or "tts-1-hd" for higher quality
275
+ voice=voice_type,
276
+ input=input_text,
277
+ speed=float(voice_speed)
278
+ )
279
+
280
+ # Create temp directory if it doesn't exist
281
+ temp_dir = os.path.join(os.getcwd(), 'temp')
282
+ if not os.path.exists(temp_dir):
283
+ os.makedirs(temp_dir)
284
+
285
+ # Save the audio to a temporary file
286
+ audio_path = os.path.join(temp_dir, f"output_{hash(input_text)}.mp3")
287
+ with open(audio_path, "wb") as f:
288
+ for chunk in speech_response.iter_bytes():
289
+ f.write(chunk)
290
+
291
+ audio_file = audio_path
292
+
293
+ # Return based on output option
294
+ if output_option == "summary_text":
295
+ return None, summary_text
296
+ elif output_option == "audio":
297
+ return audio_file, None
298
+ elif output_option == "both":
299
+ return audio_file, summary_text
300
+
301
+ except Exception as e:
302
+ return None, f"Error: {str(e)}"
303
+
304
+ return None, None