MrFeelgoood commited on
Commit
20f0ac1
1 Parent(s): 2de9223

Redesigned interface

Browse files

Rebuilt the gradio interface

Files changed (1) hide show
  1. app.py +51 -17
app.py CHANGED
@@ -214,9 +214,7 @@ def format_output(extracted_values):
214
  return output
215
 
216
 
217
-
218
-
219
- def pdf_ocr(file):
220
  # Convert PDF to image
221
  with tempfile.TemporaryDirectory() as path:
222
  with open(file, "rb") as f:
@@ -240,28 +238,64 @@ def pdf_ocr(file):
240
  # Clear the image list to free up memory
241
  del images
242
 
243
- # Call extractor_clean and format_output functions
244
  ks = ('mq', 'metri quadri', 'm2')
245
- tra = 'it5/it5-base-question-answering'
246
  quest = "Quanti metri quadri misura la superficie?"
247
  totalK = ['totale', 'complessivo', 'complessiva']
248
 
249
- extracted_values = extractor_clean(text=text, k_words=ks, transformer=tra, question=quest, total_kwords=totalK, return_text=True)
250
- values_output = extracted_values[0][0] # Join values with '\n'
251
- total_output = extracted_values[0][1]
252
  text_output = extracted_values[2]
253
 
254
- return values_output, total_output, text_output
 
255
 
256
- def ocr_interface(pdf_file):
 
 
257
  # Call the pdf_ocr function
258
- values, total, text = pdf_ocr(pdf_file.name)
259
  return values, total, text
260
 
261
 
262
- pdf_input = gr.inputs.File(label="PDF File")
263
- values_output = gr.outputs.Textbox(label="Mq. Values")
264
- total_output = gr.outputs.Textbox(label="Total")
265
- text_output = gr.outputs.Textbox(label="Ref. Text")
266
- iface = gr.Interface(fn=ocr_interface, inputs=pdf_input, title="PDF MQ EXTRACTOR", examples=["Example1.pdf", "Example2.pdf"], outputs=[values_output, total_output, text_output])
267
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  return output
215
 
216
 
217
+ def pdf_ocr(file, model_t, question):
 
 
218
  # Convert PDF to image
219
  with tempfile.TemporaryDirectory() as path:
220
  with open(file, "rb") as f:
 
238
  # Clear the image list to free up memory
239
  del images
240
 
 
241
  ks = ('mq', 'metri quadri', 'm2')
 
242
  quest = "Quanti metri quadri misura la superficie?"
243
  totalK = ['totale', 'complessivo', 'complessiva']
244
 
245
+ extracted_values = extractor_clean(text=text, k_words=ks, transformer=model_t, question=question, total_kwords=totalK, return_text=True)
246
+ values_output = extracted_values[0][0]
247
+ total_output = f'{extracted_values[0][1]} Mq'
248
  text_output = extracted_values[2]
249
 
250
+ immobile_values = [f'{i + 1}. Immobile : {value} Mq\n' for i, value in enumerate(values_output)]
251
+ immobile_values = '\n'.join(immobile_values)
252
 
253
+ return immobile_values, total_output, text_output
254
+
255
+ def ocr_interface(pdf_file, model_t, question):
256
  # Call the pdf_ocr function
257
+ values, total, text = pdf_ocr(pdf_file.name, model_t, question)
258
  return values, total, text
259
 
260
 
261
+
262
+
263
+
264
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
265
+
266
+ gr.Markdown(
267
+ '''
268
+ # PDF Mq Extractor
269
+ Set the params and switch the tabs to see the output.
270
+ ''')
271
+ with gr.Tab("Extractor", scroll_to_output = True):
272
+ with gr.Row():
273
+ pdf_input = gr.inputs.File(label="PDF File")
274
+
275
+ with gr.Row():
276
+ model_input = gr.inputs.Dropdown(['it5/it5-base-question-answering', 'it5/it5-small-question-answering'], label = 'Select model')
277
+ question_input = gr.inputs.Dropdown(["Quanti metri quadri misura l'immobile?"], label = 'Question')
278
+
279
+ with gr.Column():
280
+ gr.Markdown(
281
+ '''
282
+ # Output values
283
+ Values extracted from the pdf document
284
+ ''')
285
+
286
+ with gr.Row():
287
+
288
+ values_output = gr.outputs.Textbox(label="Area Values")
289
+ total_output = gr.outputs.Textbox(label="Total")
290
+ with gr.Row():
291
+ extract_button = gr.Button("Extract")
292
+
293
+ with gr.Tab("Ref. Text"):
294
+ text_output = gr.outputs.Textbox(label="Ref. Text")
295
+
296
+ extract_button.click(fn = ocr_interface, inputs=[pdf_input, model_input, question_input], outputs=[values_output, total_output, text_output])
297
+
298
+
299
+ demo.launch()
300
+
301
+