Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,22 +15,26 @@ sentiment_analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncas
|
|
15 |
# Function to read content from different file types
|
16 |
def read_file(file, file_type):
|
17 |
content = ""
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
for
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
34 |
return content
|
35 |
|
36 |
# Function to process the file and generate outputs
|
@@ -38,7 +42,7 @@ def process_file(file, file_type, language="en"):
|
|
38 |
content = read_file(file, file_type)
|
39 |
|
40 |
# Check if content is not empty
|
41 |
-
if not content.strip():
|
42 |
return "Error: The document is empty or unsupported format.", None, None, None, None, None
|
43 |
|
44 |
# Summarize the content
|
@@ -83,7 +87,7 @@ def process_file(file, file_type, language="en"):
|
|
83 |
def home_page():
|
84 |
with gr.Blocks() as home:
|
85 |
# Header
|
86 |
-
gr.Markdown("##
|
87 |
|
88 |
# Menu bar as buttons
|
89 |
with gr.Row():
|
@@ -93,6 +97,20 @@ def home_page():
|
|
93 |
# Display content on home page
|
94 |
gr.Markdown("Welcome to the Document Processor!")
|
95 |
gr.Markdown("Upload your document here and click to view details on the 'Full Analysis' page.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
return home
|
98 |
|
@@ -109,21 +127,18 @@ def detailed_page():
|
|
109 |
# File upload and processing components
|
110 |
file_input = gr.File(label="Upload Document")
|
111 |
file_type = gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type")
|
112 |
-
content_output = gr.Textbox(label="Original Content")
|
113 |
-
rephrased_output = gr.Textbox(label="Rephrased Content")
|
114 |
-
summary_output = gr.Textbox(label="Summary")
|
115 |
-
sentiment_output = gr.Textbox(label="Sentiment Analysis")
|
116 |
keywords_output = gr.Textbox(label="Keywords")
|
|
|
117 |
download_link = gr.File(label="Download Processed Document")
|
118 |
|
119 |
def on_file_upload(file, file_type):
|
120 |
if not file:
|
121 |
-
return "No file uploaded.", None, None, None
|
122 |
-
|
123 |
-
return
|
124 |
|
125 |
# Process file on upload
|
126 |
-
file_input.change(on_file_upload, inputs=[file_input, file_type], outputs=[
|
127 |
|
128 |
# Sample output or content for the detailed analysis page
|
129 |
gr.Markdown("Here you will see detailed analysis outputs after document upload.")
|
|
|
15 |
# Function to read content from different file types
|
16 |
def read_file(file, file_type):
|
17 |
content = ""
|
18 |
+
try:
|
19 |
+
if file_type == "docx":
|
20 |
+
doc = Document(file)
|
21 |
+
for para in doc.paragraphs:
|
22 |
+
content += para.text + "\n"
|
23 |
+
elif file_type == "txt":
|
24 |
+
content = file.read().decode("utf-8")
|
25 |
+
elif file_type == "pdf":
|
26 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
27 |
+
for page in pdf_reader.pages:
|
28 |
+
content += page.extract_text() + "\n"
|
29 |
+
elif file_type == "pptx":
|
30 |
+
prs = Presentation(file)
|
31 |
+
for slide in prs.slides:
|
32 |
+
for shape in slide.shapes:
|
33 |
+
if hasattr(shape, "text"):
|
34 |
+
content += shape.text + "\n"
|
35 |
+
except Exception as e:
|
36 |
+
content = f"Error reading the file: {str(e)}"
|
37 |
+
|
38 |
return content
|
39 |
|
40 |
# Function to process the file and generate outputs
|
|
|
42 |
content = read_file(file, file_type)
|
43 |
|
44 |
# Check if content is not empty
|
45 |
+
if not content.strip() or "Error" in content:
|
46 |
return "Error: The document is empty or unsupported format.", None, None, None, None, None
|
47 |
|
48 |
# Summarize the content
|
|
|
87 |
def home_page():
|
88 |
with gr.Blocks() as home:
|
89 |
# Header
|
90 |
+
gr.Markdown("## Upload a Document to Process")
|
91 |
|
92 |
# Menu bar as buttons
|
93 |
with gr.Row():
|
|
|
97 |
# Display content on home page
|
98 |
gr.Markdown("Welcome to the Document Processor!")
|
99 |
gr.Markdown("Upload your document here and click to view details on the 'Full Analysis' page.")
|
100 |
+
|
101 |
+
# File upload and content output
|
102 |
+
file_input = gr.File(label="Upload Document")
|
103 |
+
content_output = gr.Textbox(label="Original Content")
|
104 |
+
rephrased_output = gr.Textbox(label="Rephrased Content")
|
105 |
+
|
106 |
+
def on_file_upload(file):
|
107 |
+
if not file:
|
108 |
+
return "No file uploaded.", None
|
109 |
+
content, rephrased, _, _, _, _ = process_file(file, file_type="docx")
|
110 |
+
return content, rephrased
|
111 |
+
|
112 |
+
# Process file on upload
|
113 |
+
file_input.change(on_file_upload, inputs=file_input, outputs=[content_output, rephrased_output])
|
114 |
|
115 |
return home
|
116 |
|
|
|
127 |
# File upload and processing components
|
128 |
file_input = gr.File(label="Upload Document")
|
129 |
file_type = gr.Dropdown(["pdf", "docx", "txt", "pptx"], label="File Type")
|
|
|
|
|
|
|
|
|
130 |
keywords_output = gr.Textbox(label="Keywords")
|
131 |
+
sentiment_output = gr.Textbox(label="Sentiment Analysis")
|
132 |
download_link = gr.File(label="Download Processed Document")
|
133 |
|
134 |
def on_file_upload(file, file_type):
|
135 |
if not file:
|
136 |
+
return "No file uploaded.", None, None, None
|
137 |
+
_, _, _, sentiment, keywords, download_path = process_file(file, file_type)
|
138 |
+
return keywords, sentiment, download_path
|
139 |
|
140 |
# Process file on upload
|
141 |
+
file_input.change(on_file_upload, inputs=[file_input, file_type], outputs=[keywords_output, sentiment_output, download_link])
|
142 |
|
143 |
# Sample output or content for the detailed analysis page
|
144 |
gr.Markdown("Here you will see detailed analysis outputs after document upload.")
|