Spaces:

karolidaszak
/

LayoutLMv3_RFP

Runtime error

App Files Files Community

karida commited on Jan 23

Commit

d4a6a10

•

1 Parent(s): 8b5a1c6

Add gradio

Browse files

Files changed (2) hide show

README.md +1 -1
main.py +165 -0

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: purple
 colorTo: purple
 sdk: gradio
 sdk_version: 4.15.0
-app_file: app.py
 pinned: false
 license: other
 ---

 colorTo: purple
 sdk: gradio
 sdk_version: 4.15.0
+app_file: main.py
 pinned: false
 license: other
 ---

main.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import torch
+from PIL import ImageDraw, ImageFont, Image
+from transformers import AutoModelForTokenClassification, AutoProcessor
+import fitz  # PyMuPDF
+import io
+def extract_data_from_pdf(pdf_path, page_number=0):
+    """
+    Extracts image, words, and bounding boxes from a specified page of a PDF.
+    Args:
+    - pdf_path (str): Path to the PDF file.
+    - page_number (int): Page number to extract data from (0-indexed).
+    Returns:
+    - image: An image of the specified page.
+    - words: A list of words found on the page.
+    - boxes: A list of bounding boxes corresponding to the words.
+    """
+    # Open the PDF
+    doc = fitz.open(pdf_path)
+    page = doc.load_page(page_number)
+    # Extract image of the page
+    pix = page.get_pixmap()
+    image_bytes = pix.tobytes("png")
+    image = Image.open(io.BytesIO(image_bytes))
+    # Extract words and their bounding boxes
+    words = []
+    boxes = []
+    for word in page.get_text("words"):
+        words.append(word[4])
+        boxes.append(word[:4])  # (x0, y0, x1, y1)
+    doc.close()
+    return image, words, boxes
+def merge_pairs_v2(pairs):
+    if not pairs:
+        return []
+    merged = [pairs[0]]
+    for current in pairs[1:]:
+        last = merged[-1]
+        if last[0] == current[0]:
+            # Merge 'y' values (as strings) if 'x' values are the same
+            merged[-1] = [last[0], last[1] + " " + current[1]]
+        else:
+            merged.append(current)
+    return merged
+def create_pretty_table(data):
+    table = "<div>"
+    for row in data:
+        color = (
+            "blue"
+            if row[0] == "Heder"
+            else "green"
+            if row[0] == "Section"
+            else "black"
+        )
+        table += "<p style='color:{};'>---{}---</p>{}".format(
+            color, row[0], row[1]
+        )
+    table += "</div>"
+    return table
+# When using this function in Gradio, set the output type to 'html'
+def interference(example, page_number=0):
+    image, words, boxes = extract_data_from_pdf(example, page_number)
+    boxes = [list(map(int, box)) for box in boxes]
+    # Process the image and words
+    model = AutoModelForTokenClassification.from_pretrained(
+        "karida/LayoutLMv3_RFP"
+    )
+    processor = AutoProcessor.from_pretrained(
+        "microsoft/layoutlmv3-base", apply_ocr=False
+    )
+    encoding = processor(image, words, boxes=boxes, return_tensors="pt")
+    # Prediction
+    with torch.no_grad():
+        outputs = model(**encoding)
+    logits = outputs.logits
+    predictions = logits.argmax(-1).squeeze().tolist()
+    model_words = encoding.word_ids()
+    # Process predictions
+    token_boxes = encoding.bbox.squeeze().tolist()
+    width, height = image.size
+    true_predictions = [model.config.id2label[pred] for pred in predictions]
+    true_boxes = token_boxes
+    # Draw annotations on the image
+    draw = ImageDraw.Draw(image)
+    font = ImageFont.load_default()
+    def iob_to_label(label):
+        label = label[2:]
+        return "other" if not label else label.lower()
+    label2color = {
+        "question": "blue",
+        "answer": "green",
+        "header": "orange",
+        "other": "violet",
+    }
+    # print(len(true_predictions), len(true_boxes), len(model_words))
+    table = []
+    ids = set()
+    for prediction, box, model_word in zip(
+        true_predictions, true_boxes, model_words
+    ):
+        predicted_label = iob_to_label(prediction)
+        draw.rectangle(box, outline=label2color[predicted_label], width=2)
+        # draw.text((box[0] + 10, box[1] - 10), text=predicted_label, fill=label2color[predicted_label], font=font)
+        if model_word and model_word not in ids and predicted_label != "other":
+            ids.add(model_word)
+            table.append([predicted_label[0], words[model_word]])
+    values = merge_pairs_v2(table)
+    values = [
+        ["Heder", x[1]] if x[0] == "q" else ["Section", x[1]] for x in values
+    ]
+    table = create_pretty_table(values)
+    return image, table
+import gradio as gr
+description_text = """
+<p>
+    Heading - <span style='color: blue;'>shown in blue</span><br>
+    Section - <span style='color: green;'>shown in green</span><br>
+    other - (ignored)<span style='color: violet;'>shown in violet</span>
+</p>
+"""
+flagging_options = ["great example", "bad example"]
+iface = gr.Interface(
+    fn=interference,
+    inputs=["file", "number"],
+    outputs=["image", "html"],
+    # examples=[["output.pdf", 1]],
+    description=description_text,
+    flagging_options=flagging_options,
+)
+# iface.save(".")
+if __name__ == "__main__":
+    iface.launch()