File size: 6,040 Bytes
32531dc
 
 
 
 
 
 
 
 
 
 
 
 
 
9d47d09
 
32531dc
ae25925
32531dc
ae25925
 
 
 
 
9d47d09
 
 
ae25925
 
 
 
 
 
 
 
32531dc
9d47d09
dd791f7
32531dc
 
ae25925
32531dc
9d47d09
ae25925
32531dc
 
9d47d09
 
 
32531dc
9d47d09
 
ae25925
 
9d47d09
32531dc
 
ae25925
32531dc
9d47d09
dd791f7
 
9d47d09
ae25925
 
 
 
 
 
 
32531dc
dd791f7
 
32531dc
 
 
 
dd791f7
9d47d09
 
 
 
 
dd791f7
 
 
 
 
9d47d09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd791f7
 
ae25925
 
 
 
9d47d09
 
 
 
 
 
 
 
ae25925
9d47d09
 
 
ae25925
9d47d09
 
ae25925
 
 
 
 
 
9d47d09
ae25925
9d47d09
 
 
 
 
 
 
 
 
32531dc
 
9d47d09
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import os
import cv2
import numpy as np
from PIL import Image
import pytesseract
import gradio as gr
from pdf2image import convert_from_path
import PyPDF2
from llama_index.core import VectorStoreIndex, Document
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core import get_response_synthesizer
from sentence_transformers import SentenceTransformer, util
import logging
from openai_tts_tool import generate_audio_and_text
import tempfile

# [Previous imports and initialization code remains the same...]

def create_summary_file(summary_text):
    """Create a downloadable file from the summary text"""
    if not summary_text:
        return None
        
    temp_dir = os.path.join(os.getcwd(), 'temp')
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)
        
    # Create a unique filename
    summary_file = os.path.join(temp_dir, f"summary_{hash(summary_text)}.txt")
    
    with open(summary_file, 'w', encoding='utf-8') as f:
        f.write(summary_text)
        
    return summary_file

def query_app(query, model_name, use_similarity_check, api_key):
    global vector_index, query_log

    if vector_index is None:
        return "No documents indexed yet. Please upload documents first.", None

    if not api_key:
        return "Please provide a valid OpenAI API Key.", None

    try:
        llm = OpenAI(model=model_name, api_key=api_key)
        response_synthesizer = get_response_synthesizer(llm=llm)
        query_engine = vector_index.as_query_engine(llm=llm, response_synthesizer=response_synthesizer)
        response = query_engine.query(query)
        
        generated_response = response.response
        # Return both the response and the same response (to update the text generation input)
        return generated_response, generated_response

    except Exception as e:
        logging.error(f"Error during query processing: {e}")
        return f"Error during query processing: {str(e)}", None

def create_gradio_interface():
    with gr.Blocks(title="Document Processing and TTS App") as demo:
        gr.Markdown("# πŸ“„ Document Processing, Text & Audio Generation App")
        
        # Store API key at the top level to share across tabs
        api_key_input = gr.Textbox(
            label="Enter OpenAI API Key",
            placeholder="Paste your OpenAI API Key here",
            type="password"
        )
        
        with gr.Tab("πŸ“€ Upload Documents"):
            file_upload = gr.File(label="Upload Files", file_count="multiple", type="filepath")
            lang_dropdown = gr.Dropdown(choices=langs, label="Select OCR Language", value='eng')
            upload_button = gr.Button("Upload and Index")
            upload_status = gr.Textbox(label="Status", interactive=False)

        with gr.Tab("❓ Ask a Question"):
            query_input = gr.Textbox(label="Enter your question")
            model_dropdown = gr.Dropdown(
                choices=["gpt-4-0125-preview", "gpt-3.5-turbo-0125"],
                label="Select Model",
                value="gpt-3.5-turbo-0125"
            )
            similarity_checkbox = gr.Checkbox(label="Use Similarity Check", value=False)
            query_button = gr.Button("Ask")
            answer_output = gr.Textbox(label="Answer", interactive=False)

        with gr.Tab("πŸ—£οΈ Generate Audio and Text"):
            text_input = gr.Textbox(label="Enter text for generation")
            voice_type = gr.Dropdown(
                choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
                label="Voice Type",
                value="alloy"
            )
            voice_speed = gr.Slider(
                minimum=0.25,
                maximum=4.0,
                value=1.0,
                label="Voice Speed"
            )
            language = gr.Dropdown(
                choices=["en", "ar", "de", "hi", "es", "fr", "it", "ja", "ko", "pt"],
                label="Language",
                value="en"
            )
            output_option = gr.Radio(
                choices=["audio", "summary_text", "both"],
                label="Output Option",
                value="both"
            )
            summary_length = gr.Slider(
                minimum=50,
                maximum=500,
                value=100,
                step=10,
                label="Summary Length (words)"
            )
            additional_prompt = gr.Textbox(label="Additional Prompt (Optional)")
            generate_button = gr.Button("Generate")
            
            with gr.Row():
                audio_output = gr.Audio(label="Generated Audio")
                summary_output = gr.File(label="Generated Summary Text")

        # Wire up the components
        upload_button.click(
            fn=process_upload,
            inputs=[api_key_input, file_upload, lang_dropdown],
            outputs=[upload_status]
        )
        
        # Modified to update both answer output and text generation input
        query_button.click(
            fn=query_app,
            inputs=[query_input, model_dropdown, similarity_checkbox, api_key_input],
            outputs=[answer_output, text_input]  # Now updates both outputs
        )
        
        # Modified to handle file output
        def process_generation(*args):
            audio_file, summary_text = generate_audio_and_text(*args)
            summary_file = create_summary_file(summary_text) if summary_text else None
            return audio_file, summary_file
            
        generate_button.click(
            fn=process_generation,
            inputs=[
                api_key_input, text_input, model_dropdown, voice_type,
                voice_speed, language, output_option, summary_length,
                additional_prompt
            ],
            outputs=[audio_output, summary_output]
        )

    return demo

if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch()
else:
    demo = create_gradio_interface()