Spaces:

agentsea
/

paligemma-waveui

Running on Zero

App Files Files Community

nph4rd commited on Jul 8

Commit

c29ac1a

•

1 Parent(s): 4019c00

Create app.py

Browse files

add egs

update

remove seg

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

airbnb.jpg +0 -0
app.py +137 -0
examples/barsik.jpg +0 -0
examples/barsik.json +7 -0
examples/biennale.jpg +0 -0
examples/biennale.json +7 -0
examples/billard1.jpg +0 -0
examples/billard1.json +7 -0
examples/billard2.jpg +0 -0
examples/billard2.json +7 -0
examples/bowie.jpg +0 -0
examples/bowie.json +7 -0
examples/branch.jpg +0 -0
examples/branch.json +7 -0
examples/cc_fox.jpg +0 -0
examples/cc_fox.json +7 -0
examples/cc_landscape.jpg +0 -0
examples/cc_landscape.json +7 -0
examples/cc_puffin.jpg +0 -0
examples/cc_puffin.json +7 -0
examples/couch.jpg +0 -0
examples/couch.json +7 -0
examples/couch_.json +7 -0
examples/cups.jpg +0 -0
examples/cups.json +7 -0
examples/dice.jpg +0 -0
examples/dice.json +7 -0
examples/emu.jpg +0 -0
examples/emu.json +7 -0
examples/fridge.jpg +0 -0
examples/fridge.json +7 -0
examples/givt.jpg +0 -0
examples/givt.json +7 -0
examples/greenlake.jpg +0 -0
examples/greenlake.json +7 -0
examples/howto.jpg +0 -0
examples/howto.json +7 -0
examples/markers.jpg +0 -0
examples/markers.json +7 -0
examples/mcair.jpg +0 -0
examples/mcair.json +7 -0
examples/mcair_.json +7 -0
examples/minergie.jpg +0 -0
examples/minergie.json +7 -0
examples/morel.jpg +0 -0
examples/morel.json +7 -0
examples/motorcyclists.jpg +0 -0
examples/motorcyclists.json +7 -0
examples/parking.jpg +0 -0
examples/parking.json +7 -0

airbnb.jpg ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import gradio as gr
+import PIL.Image
+import transformers
+from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
+import torch
+import os
+import string
+import functools
+import re
+import numpy as np
+import spaces
+model_id = "agentsea/paligemma-3b-ft-widgetcap-waveui-448"
+processor_id = "google/paligemma-3b-pt-448"
+COLORS = ['#4285f4', '#db4437', '#f4b400', '#0f9d58', '#e48ef1']
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = PaliGemmaForConditionalGeneration.from_pretrained(model_id).eval().to(device)
+processor = PaliGemmaProcessor.from_pretrained(processor_id)
+###### Transformers Inference
+@spaces.GPU
+def infer(
+    image: PIL.Image.Image,
+    text: str,
+    max_new_tokens: int
+) -> str:
+    inputs = processor(text=text, images=image, return_tensors="pt").to(device)
+    with torch.inference_mode():
+      generated_ids = model.generate(
+          **inputs,
+          max_new_tokens=max_new_tokens,
+          do_sample=False
+      )
+    result = processor.batch_decode(generated_ids, skip_special_tokens=True)
+    return result[0][len(text):].lstrip("\n")
+def parse_segmentation(input_image, input_text):
+  out = infer(input_image, input_text, max_new_tokens=100)
+  objs = extract_objs(out.lstrip("\n"), input_image.size[0], input_image.size[1], unique_labels=True)
+  labels = set(obj.get('name') for obj in objs if obj.get('name'))
+  color_map = {l: COLORS[i % len(COLORS)] for i, l in enumerate(labels)}
+  highlighted_text = [(obj['content'], obj.get('name')) for obj in objs]
+  annotated_img = (
+    input_image,
+    [
+        (
+            obj['mask'] if obj.get('mask') is not None else obj['xyxy'],
+            obj['name'] or '',
+        )
+        for obj in objs
+        if 'mask' in obj or 'xyxy' in obj
+    ],
+)
+  has_annotations = bool(annotated_img[1])
+  return annotated_img
+######## Demo
+INTRO_TEXT = """## PaliGemma WaveUI\n\n
+Bla bla
+"""
+with gr.Blocks(css="style.css") as demo:
+  gr.Markdown(INTRO_TEXT)
+  with gr.Tab("Detection"):
+    image = gr.Image(type="pil")
+    seg_input = gr.Text(label="Entities to Detect")
+    seg_btn = gr.Button("Submit")
+    annotated_image = gr.AnnotatedImage(label="Output")
+    examples = [["./airbnb.jpg", "detect 'Amazing pools' button"]]
+    gr.Markdown("Example images are licensed CC0 by [akolesnikoff@](https://github.com/akolesnikoff), [mbosnjak@](https://github.com/mbosnjak), [maximneumann@](https://github.com/maximneumann) and [merve](https://huggingface.co/merve).")
+    gr.Examples(
+        examples=examples,
+        inputs=[image, seg_input],
+    )
+    seg_inputs = [
+        image,
+        seg_input
+        ]
+    seg_outputs = [
+        annotated_image
+    ]
+    seg_btn.click(
+        fn=parse_segmentation,
+        inputs=seg_inputs,
+        outputs=seg_outputs,
+    )
+_SEGMENT_DETECT_RE = re.compile(
+    r'(.*?)' +
+    r'<loc(\d{4})>' * 4 + r'\s*' +
+    '(?:%s)?' % (r'<seg(\d{3})>' * 16) +
+    r'\s*([^;<>]+)? ?(?:; )?',
+)
+def extract_objs(text, width, height, unique_labels=False):
+  """Returns objs for a string with "<loc>" and "<seg>" tokens."""
+  objs = []
+  seen = set()
+  while text:
+    m = _SEGMENT_DETECT_RE.match(text)
+    if not m:
+      break
+    print("m", m)
+    gs = list(m.groups())
+    before = gs.pop(0)
+    name = gs.pop()
+    y1, x1, y2, x2 = [int(x) / 1024 for x in gs[:4]]
+    y1, x1, y2, x2 = map(round, (y1*height, x1*width, y2*height, x2*width))
+    mask = None
+    content = m.group()
+    if before:
+      objs.append(dict(content=before))
+      content = content[len(before):]
+    while unique_labels and name in seen:
+      name = (name or '') + "'"
+    seen.add(name)
+    objs.append(dict(
+        content=content, xyxy=(x1, y1, x2, y2), mask=mask, name=name))
+    text = text[len(before) + len(content):]
+  if text:
+    objs.append(dict(content=text))
+  return objs
+#########
+if __name__ == "__main__":
+    demo.queue(max_size=10).launch(debug=True)

examples/barsik.jpg ADDED Viewed

examples/barsik.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "barsik",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "segment cat",
+  "license": "CC0 by [maximneumann@](https://github.com/maximneumann)"
+}

examples/biennale.jpg ADDED Viewed

examples/biennale.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "biennale",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "In which city is this?",
+  "license": "CC0 by [andsteing@](https://huggingface.co/andsteing)"
+}

examples/billard1.jpg ADDED Viewed

examples/billard1.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "billard1",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "How many red balls are there?",
+  "license": "CC0 by [mbosnjak@](https://github.com/mbosnjak)"
+}

examples/billard2.jpg ADDED Viewed

examples/billard2.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "billard2",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "How many balls are there?",
+  "license": "CC0 by [mbosnjak@](https://github.com/mbosnjak)"
+}

examples/bowie.jpg ADDED Viewed

examples/bowie.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "bowie",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Who is this?",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/branch.jpg ADDED Viewed

examples/branch.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "branch",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "What caused this?",
+  "license": "CC0 by [andsteing@](https://huggingface.co/andsteing)"
+}

examples/cc_fox.jpg ADDED Viewed

examples/cc_fox.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "cc_fox",
+  "comment": "",
+  "model": "paligemma-3b-mix-448",
+  "prompt": "Which breed is this fox?",
+  "license": "CC0 by [XiaohuaZhai@](https://sites.google.com/view/xzhai)"
+}

examples/cc_landscape.jpg ADDED Viewed

examples/cc_landscape.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "cc_landscape",
+  "comment": "",
+  "model": "paligemma-3b-mix-448",
+  "prompt": "What does the image show?",
+  "license": "CC0 by [XiaohuaZhai@](https://sites.google.com/view/xzhai)"
+}

examples/cc_puffin.jpg ADDED Viewed

examples/cc_puffin.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "cc_puffin",
+  "comment": "",
+  "model": "paligemma-3b-mix-448",
+  "prompt": "detect puffin in the back; puffin in front",
+  "license": "CC0 by [XiaohuaZhai@](https://sites.google.com/view/xzhai)"
+}

examples/couch.jpg ADDED Viewed

examples/couch.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "couch",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "How many yellow cushions are on the couch?",
+  "license": "CC0"
+}

examples/couch_.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "couch",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "How many painting do you see in the image?",
+  "license": "CC0"
+}

examples/cups.jpg ADDED Viewed

examples/cups.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "cups",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "how many cups?",
+  "license": "CC0 by [mbosnjak@](https://github.com/mbosnjak)"
+}

examples/dice.jpg ADDED Viewed

examples/dice.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "dice",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "segment dice ; dice",
+  "license": "CC0 by [andresusanopinto@](https://github.com/andresusanopinto)"
+}

examples/emu.jpg ADDED Viewed

examples/emu.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "emu",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "What animal is this?",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/fridge.jpg ADDED Viewed

examples/fridge.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "fridge",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Describe the image.",
+  "license": "CC0 by [andresusanopinto@](https://github.com/andresusanopinto)"
+}

examples/givt.jpg ADDED Viewed

examples/givt.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "givt",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "What does the image show?",
+  "license": "CC-BY [GIVT paper](https://arxiv.org/abs/2312.02116)"
+}

examples/greenlake.jpg ADDED Viewed

examples/greenlake.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "greenlake",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Describe the image.",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/howto.jpg ADDED Viewed

examples/howto.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "howto",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "What does this image show?",
+  "license": "CC-BY [How to train your ViT?](https://arxiv.org/abs/2106.10270)"
+}

examples/markers.jpg ADDED Viewed

examples/markers.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "markers",
+  "comment": "answer en How many cups are there?",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "How many cups are there?",
+  "license": "CC0"
+}

examples/mcair.jpg ADDED Viewed

examples/mcair.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "mcair",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Can you board this airplane?",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/mcair_.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "mcair",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Is this a restaurant?",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/minergie.jpg ADDED Viewed

examples/minergie.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "minergie",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "ocr",
+  "license": "CC0 by [andsteing@](https://huggingface.co/andsteing)"
+}

examples/morel.jpg ADDED Viewed

examples/morel.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "morel",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "detect morel",
+  "license": "CC0 by [andsteing@](https://huggingface.co/andsteing)"
+}

examples/motorcyclists.jpg ADDED Viewed

examples/motorcyclists.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "motorcyclists",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "What does the image show?",
+  "license": "CC0 by [akolesnikoff@](https://github.com/akolesnikoff)"
+}

examples/parking.jpg ADDED Viewed

examples/parking.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "name": "parking",
+  "comment": "",
+  "model": "paligemma-3b-mix-224",
+  "prompt": "Describe the image.",
+  "license": "CC0 by [xiaohuazhai@](https://huggingface.co/xiaohuazhai)"
+}