Spaces:
Build error
Build error
MrFeelgoood
commited on
Commit
•
5d57f7a
1
Parent(s):
4b491a9
Modified graphics of the UI
Browse files
app.py
CHANGED
@@ -205,17 +205,6 @@ def extractor_clean(text, k_words, transformer, question, total_kwords, return_t
|
|
205 |
|
206 |
|
207 |
|
208 |
-
def format_output(extracted_values):
|
209 |
-
output = f"Valori: {extracted_values[0][0]}\n"
|
210 |
-
output += f"Totale: {extracted_values[0][1]}\n"
|
211 |
-
if extracted_values[1] == True:
|
212 |
-
output += "-------------------\n"
|
213 |
-
output += f"Rif. Testo:\n{extracted_values[2]}"
|
214 |
-
return output
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
def pdf_ocr(file):
|
220 |
# Convert PDF to image
|
221 |
with tempfile.TemporaryDirectory() as path:
|
@@ -243,22 +232,25 @@ def pdf_ocr(file):
|
|
243 |
# Call extractor_clean and format_output functions
|
244 |
ks = ('mq', 'metri quadri', 'm2')
|
245 |
tra = 'it5/it5-base-question-answering'
|
246 |
-
quest = "Quanti metri quadri misura
|
247 |
totalK = ['totale', 'complessivo', 'complessiva']
|
248 |
|
249 |
extracted_values = extractor_clean(text=text, k_words=ks, transformer=tra, question=quest, total_kwords=totalK, return_text=True)
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
|
|
|
254 |
|
255 |
def ocr_interface(pdf_file):
|
256 |
# Call the pdf_ocr function
|
257 |
-
|
258 |
-
return
|
259 |
|
260 |
|
261 |
pdf_input = gr.inputs.File(label="PDF File")
|
262 |
-
|
263 |
-
|
264 |
-
|
|
|
|
|
|
205 |
|
206 |
|
207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
def pdf_ocr(file):
|
209 |
# Convert PDF to image
|
210 |
with tempfile.TemporaryDirectory() as path:
|
|
|
232 |
# Call extractor_clean and format_output functions
|
233 |
ks = ('mq', 'metri quadri', 'm2')
|
234 |
tra = 'it5/it5-base-question-answering'
|
235 |
+
quest = "Quanti metri quadri misura la superficie?"
|
236 |
totalK = ['totale', 'complessivo', 'complessiva']
|
237 |
|
238 |
extracted_values = extractor_clean(text=text, k_words=ks, transformer=tra, question=quest, total_kwords=totalK, return_text=True)
|
239 |
+
values_output = extracted_values[0][0] # Join values with '\n'
|
240 |
+
total_output = extracted_values[0][1]
|
241 |
+
text_output = extracted_values[2]
|
242 |
|
243 |
+
return values_output, total_output, text_output
|
244 |
|
245 |
def ocr_interface(pdf_file):
|
246 |
# Call the pdf_ocr function
|
247 |
+
values, total, text = pdf_ocr(pdf_file.name)
|
248 |
+
return values, total, text
|
249 |
|
250 |
|
251 |
pdf_input = gr.inputs.File(label="PDF File")
|
252 |
+
values_output = gr.outputs.Textbox(label="Mq. Values")
|
253 |
+
total_output = gr.outputs.Textbox(label="Total")
|
254 |
+
text_output = gr.outputs.Textbox(label="Ref. Text")
|
255 |
+
iface = gr.Interface(fn=ocr_interface, inputs=pdf_input, title="PDF MQ EXTRACTOR", outputs=[values_output, total_output, text_output], preprocess=format_output)
|
256 |
+
iface.launch()
|