capradeepgujaran commited on
Commit
e6032f2
β€’
1 Parent(s): 3be3378

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -11
app.py CHANGED
@@ -15,18 +15,65 @@ import logging
15
  from openai_tts_tool import generate_audio_and_text
16
  import tempfile
17
 
18
- # [Previous imports and initialization code remains the same...]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def create_summary_file(summary_text):
21
  """Create a downloadable file from the summary text"""
22
  if not summary_text:
23
  return None
24
 
25
- temp_dir = os.path.join(os.getcwd(), 'temp')
26
- if not os.path.exists(temp_dir):
27
- os.makedirs(temp_dir)
28
-
29
- # Create a unique filename
30
  summary_file = os.path.join(temp_dir, f"summary_{hash(summary_text)}.txt")
31
 
32
  with open(summary_file, 'w', encoding='utf-8') as f:
@@ -35,6 +82,7 @@ def create_summary_file(summary_text):
35
  return summary_file
36
 
37
  def query_app(query, model_name, use_similarity_check, api_key):
 
38
  global vector_index, query_log
39
 
40
  if vector_index is None:
@@ -50,7 +98,6 @@ def query_app(query, model_name, use_similarity_check, api_key):
50
  response = query_engine.query(query)
51
 
52
  generated_response = response.response
53
- # Return both the response and the same response (to update the text generation input)
54
  return generated_response, generated_response
55
 
56
  except Exception as e:
@@ -70,7 +117,12 @@ def create_gradio_interface():
70
 
71
  with gr.Tab("πŸ“€ Upload Documents"):
72
  file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
73
- lang_dropdown = gr.Dropdown(choices=langs, label="Select OCR Language", value='eng')
 
 
 
 
 
74
  upload_button = gr.Button("Upload and Index")
75
  upload_status = gr.Textbox(label="Status", interactive=False)
76
 
@@ -129,14 +181,12 @@ def create_gradio_interface():
129
  outputs=[upload_status]
130
  )
131
 
132
- # Modified to update both answer output and text generation input
133
  query_button.click(
134
  fn=query_app,
135
  inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input],
136
- outputs=[answer_output, text_input] # Now updates both outputs
137
  )
138
 
139
- # Modified to handle file output
140
  def process_generation(*args):
141
  audio_file, summary_text = generate_audio_and_text(*args)
142
  summary_file = create_summary_file(summary_text) if summary_text else None
 
15
  from openai_tts_tool import generate_audio_and_text
16
  import tempfile
17
 
18
+ # Set up logging configuration
19
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
20
+
21
+ # Initialize global variables
22
+ vector_index = None
23
+ query_log = []
24
+ sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
25
+
26
+ # Define a fallback list of common OCR languages
27
+ DEFAULT_LANGS = [
28
+ 'eng', # English
29
+ 'fra', # French
30
+ 'deu', # German
31
+ 'spa', # Spanish
32
+ 'ita', # Italian
33
+ 'por', # Portuguese
34
+ 'nld', # Dutch
35
+ 'pol', # Polish
36
+ 'tur', # Turkish
37
+ 'rus', # Russian
38
+ 'ara', # Arabic
39
+ 'hin', # Hindi
40
+ 'jpn', # Japanese
41
+ 'kor', # Korean
42
+ 'chi_sim', # Simplified Chinese
43
+ 'chi_tra' # Traditional Chinese
44
+ ]
45
+
46
+ def get_available_languages():
47
+ """Get available Tesseract languages with fallback"""
48
+ try:
49
+ # Try to get languages from Tesseract
50
+ langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
51
+ if langs and len(langs) > 0:
52
+ return sorted(langs)
53
+ except Exception as e:
54
+ logging.warning(f"Could not get Tesseract languages: {e}")
55
+
56
+ # Fallback to default languages
57
+ return DEFAULT_LANGS
58
+
59
+ # Get available languages once at startup
60
+ AVAILABLE_LANGUAGES = get_available_languages()
61
+
62
+ def create_temp_dir():
63
+ """Create temporary directory if it doesn't exist"""
64
+ temp_dir = os.path.join(os.getcwd(), 'temp')
65
+ if not os.path.exists(temp_dir):
66
+ os.makedirs(temp_dir)
67
+ return temp_dir
68
+
69
+ # [Previous helper functions remain the same...]
70
 
71
  def create_summary_file(summary_text):
72
  """Create a downloadable file from the summary text"""
73
  if not summary_text:
74
  return None
75
 
76
+ temp_dir = create_temp_dir()
 
 
 
 
77
  summary_file = os.path.join(temp_dir, f"summary_{hash(summary_text)}.txt")
78
 
79
  with open(summary_file, 'w', encoding='utf-8') as f:
 
82
  return summary_file
83
 
84
  def query_app(query, model_name, use_similarity_check, api_key):
85
+ """Process a query and return both the answer and the text for generation"""
86
  global vector_index, query_log
87
 
88
  if vector_index is None:
 
98
  response = query_engine.query(query)
99
 
100
  generated_response = response.response
 
101
  return generated_response, generated_response
102
 
103
  except Exception as e:
 
117
 
118
  with gr.Tab("πŸ“€ Upload Documents"):
119
  file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
120
+ lang_dropdown = gr.Dropdown(
121
+ choices=AVAILABLE_LANGUAGES,
122
+ label="Select OCR Language",
123
+ value='eng',
124
+ info="Select the primary language of your documents"
125
+ )
126
  upload_button = gr.Button("Upload and Index")
127
  upload_status = gr.Textbox(label="Status", interactive=False)
128
 
 
181
  outputs=[upload_status]
182
  )
183
 
 
184
  query_button.click(
185
  fn=query_app,
186
  inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input],
187
+ outputs=[answer_output, text_input]
188
  )
189
 
 
190
  def process_generation(*args):
191
  audio_file, summary_text = generate_audio_and_text(*args)
192
  summary_file = create_summary_file(summary_text) if summary_text else None