Baptiste Canton commited on
Commit
15f2286
1 Parent(s): 3825bf0
Files changed (2) hide show
  1. app.py +53 -20
  2. app3.py +0 -72
app.py CHANGED
@@ -1,40 +1,73 @@
 
 
1
  import logging
2
  import os
3
 
4
  import gradio as gr
 
 
5
  from pillow_heif import register_heif_opener
6
-
7
- register_heif_opener()
8
-
9
- import gradio as gr
10
  from transformers import pipeline
11
 
 
12
  LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG")
13
  MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", 200))
14
  # https://huggingface.co/models?pipeline_tag=image-to-text&sort=likes
15
  MODEL = os.getenv("MODEL", "Salesforce/blip-image-captioning-large")
16
 
 
 
17
  logging.basicConfig(level=LOG_LEVEL)
18
  logger = logging.getLogger(__name__)
19
 
20
 
21
- logger.info("Loading model...")
22
- # simpler model: "ydshieh/vit-gpt2-coco-en"
23
- captioner = pipeline(
24
- "image-to-text",
25
- model=MODEL,
26
- max_new_tokens=MAX_NEW_TOKENS,
27
- )
28
- logger.info("Done loading model.")
 
 
 
 
 
 
 
 
29
 
30
 
31
- def graptioner(image_url):
32
- global captioner
33
- result = captioner(image_url)
34
- caption = result[0]["generated_text"]
35
- return caption
 
 
 
 
 
36
 
37
 
38
- # add gradio interface
39
- iface = gr.Interface(fn=graptioner, inputs="text", outputs=["text"], allow_flagging="never")
40
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import io
3
  import logging
4
  import os
5
 
6
  import gradio as gr
7
+ import requests
8
+ from PIL import Image
9
  from pillow_heif import register_heif_opener
 
 
 
 
10
  from transformers import pipeline
11
 
12
+ os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
13
  LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG")
14
  MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", 200))
15
  # https://huggingface.co/models?pipeline_tag=image-to-text&sort=likes
16
  MODEL = os.getenv("MODEL", "Salesforce/blip-image-captioning-large")
17
 
18
+ register_heif_opener()
19
+
20
  logging.basicConfig(level=LOG_LEVEL)
21
  logger = logging.getLogger(__name__)
22
 
23
 
24
+ def setup_args():
25
+ parser = argparse.ArgumentParser()
26
+ parser.add_argument("--share", action="store_true", default=False)
27
+ return parser.parse_args()
28
+
29
+
30
+ def load_image_from_url(url):
31
+ try:
32
+ response = requests.get(url)
33
+ if not response.ok:
34
+ raise Exception("Error downloading image")
35
+ image = Image.open(io.BytesIO(response.content))
36
+ return image
37
+ except Exception as e:
38
+ logger.error("Error loading image from URL: %s", e)
39
+ raise
40
 
41
 
42
+ def graptioner(image, url):
43
+ if url and url.strip():
44
+ image = load_image_from_url(url)
45
+ width, height = image.size
46
+ if width < 1 or height < 1:
47
+ raise Exception("Invalid image")
48
+ logger.debug("Loaded image size: %sx%s", width, height)
49
+ # generate caption
50
+ result = captioner(image)
51
+ return result[0]["generated_text"]
52
 
53
 
54
+ if __name__ == "__main__":
55
+ args = setup_args()
56
+ logger.info("Loading model...")
57
+ # simpler model: "ydshieh/vit-gpt2-coco-en"
58
+ captioner = pipeline(
59
+ "image-to-text",
60
+ model=MODEL,
61
+ max_new_tokens=MAX_NEW_TOKENS,
62
+ )
63
+ logger.info("Done loading model.")
64
+ iface = gr.Interface(
65
+ fn=graptioner,
66
+ inputs=[
67
+ gr.Image(type="pil", label="Upload Image"),
68
+ gr.Textbox(lines=1, placeholder="Image URL", label="Image URL"),
69
+ ],
70
+ outputs=["text"],
71
+ allow_flagging="never",
72
+ )
73
+ iface.launch(share=args.share)
app3.py DELETED
@@ -1,72 +0,0 @@
1
- import argparse
2
- import io
3
- import logging
4
- import os
5
-
6
- import gradio as gr
7
- import requests
8
- from PIL import Image
9
- from pillow_heif import register_heif_opener
10
- from transformers import pipeline
11
-
12
- os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
13
- LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG")
14
- MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", 200))
15
- # https://huggingface.co/models?pipeline_tag=image-to-text&sort=likes
16
- MODEL = os.getenv("MODEL", "Salesforce/blip-image-captioning-large")
17
-
18
- register_heif_opener()
19
-
20
- logging.basicConfig(level=LOG_LEVEL)
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- def setup_args():
25
- parser = argparse.ArgumentParser()
26
- parser.add_argument("--share", action="store_true", default=False)
27
- return parser.parse_args()
28
-
29
-
30
- def load_image_from_url(url):
31
- try:
32
- response = requests.get(url)
33
- if not response.ok:
34
- raise Exception("Error downloading image")
35
- image = Image.open(io.BytesIO(response.content))
36
- return image
37
- except Exception as e:
38
- logger.error("Error loading image from URL: %s", e)
39
- raise
40
-
41
-
42
- def graptioner(image, url):
43
- if url and url.strip():
44
- image = load_image_from_url(url)
45
- width, height = image.size
46
- if width < 1 or height < 1:
47
- raise Exception("Invalid image")
48
- logger.debug("Loaded image size: %sx%s", width, height)
49
- # generate caption
50
- result = captioner(image)
51
- return result[0]["generated_text"]
52
-
53
-
54
- if __name__ == "__main__":
55
- args = setup_args()
56
- logger.info("Loading model...")
57
- # simpler model: "ydshieh/vit-gpt2-coco-en"
58
- captioner = pipeline(
59
- "image-to-text",
60
- model=MODEL,
61
- max_new_tokens=MAX_NEW_TOKENS,
62
- )
63
- logger.info("Done loading model.")
64
- iface = gr.Interface(
65
- fn=graptioner,
66
- inputs=[
67
- gr.Image(type="pil", label="Upload Image"),
68
- gr.Textbox(lines=1, placeholder="Image URL", label="Image URL"),
69
- ],
70
- outputs=["text"],
71
- )
72
- iface.launch(share=args.share)