Spaces:

capradeepgujaran
/

DocChat_n_Talk

Running

App Files Files Community

capradeepgujaran commited on 15 days ago

Commit

e6032f2

•

1 Parent(s): 3be3378

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -11

app.py CHANGED Viewed

@@ -15,18 +15,65 @@ import logging
 from openai_tts_tool import generate_audio_and_text
 import tempfile
-# [Previous imports and initialization code remains the same...]
 def create_summary_file(summary_text):
     """Create a downloadable file from the summary text"""
     if not summary_text:
         return None
-    temp_dir = os.path.join(os.getcwd(), 'temp')
-    if not os.path.exists(temp_dir):
-        os.makedirs(temp_dir)
-    # Create a unique filename
     summary_file = os.path.join(temp_dir, f"summary_{hash(summary_text)}.txt")
     with open(summary_file, 'w', encoding='utf-8') as f:
@@ -35,6 +82,7 @@ def create_summary_file(summary_text):
     return summary_file
 def query_app(query, model_name, use_similarity_check, api_key):
     global vector_index, query_log
     if vector_index is None:
@@ -50,7 +98,6 @@ def query_app(query, model_name, use_similarity_check, api_key):
         response = query_engine.query(query)
         generated_response = response.response
-        # Return both the response and the same response (to update the text generation input)
         return generated_response, generated_response
     except Exception as e:
@@ -70,7 +117,12 @@ def create_gradio_interface():
         with gr.Tab("📤 Upload Documents"):
             file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
-            lang_dropdown = gr.Dropdown(choices=langs, label="Select OCR Language", value='eng')
             upload_button = gr.Button("Upload and Index")
             upload_status = gr.Textbox(label="Status", interactive=False)
@@ -129,14 +181,12 @@ def create_gradio_interface():
             outputs=[upload_status]
         )
-        # Modified to update both answer output and text generation input
         query_button.click(
             fn=query_app,
             inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input],
-            outputs=[answer_output, text_input]  # Now updates both outputs
         )
-        # Modified to handle file output
         def process_generation(*args):
             audio_file, summary_text = generate_audio_and_text(*args)
             summary_file = create_summary_file(summary_text) if summary_text else None

 from openai_tts_tool import generate_audio_and_text
 import tempfile
+# Set up logging configuration
+logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
+# Initialize global variables
+vector_index = None
+query_log = []
+sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Define a fallback list of common OCR languages
+DEFAULT_LANGS = [
+    'eng',  # English
+    'fra',  # French
+    'deu',  # German
+    'spa',  # Spanish
+    'ita',  # Italian
+    'por',  # Portuguese
+    'nld',  # Dutch
+    'pol',  # Polish
+    'tur',  # Turkish
+    'rus',  # Russian
+    'ara',  # Arabic
+    'hin',  # Hindi
+    'jpn',  # Japanese
+    'kor',  # Korean
+    'chi_sim',  # Simplified Chinese
+    'chi_tra'   # Traditional Chinese
+]
+def get_available_languages():
+    """Get available Tesseract languages with fallback"""
+    try:
+        # Try to get languages from Tesseract
+        langs = os.popen('tesseract --list-langs').read().split('\n')[1:-1]
+        if langs and len(langs) > 0:
+            return sorted(langs)
+    except Exception as e:
+        logging.warning(f"Could not get Tesseract languages: {e}")
+    # Fallback to default languages
+    return DEFAULT_LANGS
+# Get available languages once at startup
+AVAILABLE_LANGUAGES = get_available_languages()
+def create_temp_dir():
+    """Create temporary directory if it doesn't exist"""
+    temp_dir = os.path.join(os.getcwd(), 'temp')
+    if not os.path.exists(temp_dir):
+        os.makedirs(temp_dir)
+    return temp_dir
+# [Previous helper functions remain the same...]
 def create_summary_file(summary_text):
     """Create a downloadable file from the summary text"""
     if not summary_text:
         return None
+    temp_dir = create_temp_dir()
     summary_file = os.path.join(temp_dir, f"summary_{hash(summary_text)}.txt")
     with open(summary_file, 'w', encoding='utf-8') as f:
     return summary_file
 def query_app(query, model_name, use_similarity_check, api_key):
+    """Process a query and return both the answer and the text for generation"""
     global vector_index, query_log
     if vector_index is None:
         response = query_engine.query(query)
         generated_response = response.response
         return generated_response, generated_response
     except Exception as e:
         with gr.Tab("📤 Upload Documents"):
             file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
+            lang_dropdown = gr.Dropdown(
+                choices=AVAILABLE_LANGUAGES,
+                label="Select OCR Language",
+                value='eng',
+                info="Select the primary language of your documents"
+            )
             upload_button = gr.Button("Upload and Index")
             upload_status = gr.Textbox(label="Status", interactive=False)
             outputs=[upload_status]
         )
         query_button.click(
             fn=query_app,
             inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input],
+            outputs=[answer_output, text_input]
         )
         def process_generation(*args):
             audio_file, summary_text = generate_audio_and_text(*args)
             summary_file = create_summary_file(summary_text) if summary_text else None