Spaces:

Awiny
/

Image2Paragraph

Runtime error

Awiny commited on Apr 16, 2023

Commit

51f8a02

•

1 Parent(s): 9b4b3ea

reduce image size for speed up

Files changed (8) hide show

app.py CHANGED Viewed

@@ -123,6 +123,7 @@ interface = gr.Interface(
     \n Twitter: https://twitter.com/awinyimgprocess/status/1646225454599372800?s=46&t=HvOe9T2n35iFuCHP5aIHpQ
     \n Since GPU is expensive, we use CPU for demo and not include semantic segment anything. Run code local with gpu or google colab we provided for fast speed.
     \n Ttext2image model is controlnet ( very slow in cpu(~2m)), which used canny edge as reference.
     """
 )

     \n Twitter: https://twitter.com/awinyimgprocess/status/1646225454599372800?s=46&t=HvOe9T2n35iFuCHP5aIHpQ
     \n Since GPU is expensive, we use CPU for demo and not include semantic segment anything. Run code local with gpu or google colab we provided for fast speed.
     \n Ttext2image model is controlnet ( very slow in cpu(~2m)), which used canny edge as reference.
+    \n To speed up, we generate image with small size 256, run the code local for high-quality sample.
     """
 )

models/__pycache__/blip2_model.cpython-38.pyc CHANGED Viewed

Binary files a/models/__pycache__/blip2_model.cpython-38.pyc and b/models/__pycache__/blip2_model.cpython-38.pyc differ

models/__pycache__/controlnet_model.cpython-38.pyc CHANGED Viewed

Binary files a/models/__pycache__/controlnet_model.cpython-38.pyc and b/models/__pycache__/controlnet_model.cpython-38.pyc differ

models/__pycache__/image_text_transformation.cpython-38.pyc CHANGED Viewed

Binary files a/models/__pycache__/image_text_transformation.cpython-38.pyc and b/models/__pycache__/image_text_transformation.cpython-38.pyc differ

models/blip2_model.py CHANGED Viewed

@@ -2,6 +2,7 @@ from PIL import Image
 import requests
 from transformers import Blip2Processor, Blip2ForConditionalGeneration
 import torch
 class ImageCaptioning:
@@ -18,15 +19,12 @@ class ImageCaptioning:
         model = Blip2ForConditionalGeneration.from_pretrained(
             "pretrained_models/blip2-opt-2.7b", torch_dtype=self.data_type
         )
-        # processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
-        # model = Blip2ForConditionalGeneration.from_pretrained(
-        #     "Salesforce/blip2-opt-2.7b", torch_dtype=self.data_type
-        # )
         model.to(self.device)
         return processor, model
     def image_caption(self, image_src):
         image = Image.open(image_src)
         inputs = self.processor(images=image, return_tensors="pt").to(self.device, self.data_type)
         generated_ids = self.model.generate(**inputs)
         generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()

 import requests
 from transformers import Blip2Processor, Blip2ForConditionalGeneration
 import torch
+from utils.util import resize_long_edge
 class ImageCaptioning:
         model = Blip2ForConditionalGeneration.from_pretrained(
             "pretrained_models/blip2-opt-2.7b", torch_dtype=self.data_type
         )
         model.to(self.device)
         return processor, model
     def image_caption(self, image_src):
         image = Image.open(image_src)
+        image = resize_long_edge(image)
         inputs = self.processor(images=image, return_tensors="pt").to(self.device, self.data_type)
         generated_ids = self.model.generate(**inputs)
         generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()

models/image_text_transformation.py CHANGED Viewed

@@ -9,6 +9,7 @@ from PIL import Image
 import base64
 from io import BytesIO
 import os
 def pil_image_to_base64(image):
     buffered = BytesIO()
@@ -41,6 +42,7 @@ class ImageTextTransformation:
     def image_to_text(self, img_src):
         # the information to generate paragraph based on the context
         self.ref_image = Image.open(img_src)
         width, height = read_image_width_height(img_src)
         print(self.args)
         if self.args.image_caption:

 import base64
 from io import BytesIO
 import os
+from utils.util import resize_long_edge
 def pil_image_to_base64(image):
     buffered = BytesIO()
     def image_to_text(self, img_src):
         # the information to generate paragraph based on the context
         self.ref_image = Image.open(img_src)
+        self.ref_image = resize_long_edge(self.ref_image)
         width, height = read_image_width_height(img_src)
         print(self.args)
         if self.args.image_caption:

utils/__pycache__/util.cpython-38.pyc CHANGED Viewed

Binary files a/utils/__pycache__/util.cpython-38.pyc and b/utils/__pycache__/util.cpython-38.pyc differ

utils/util.py CHANGED Viewed

@@ -14,6 +14,24 @@ def read_image_width_height(image_path):
     width, height = image.size
     return width, height
 def display_images_and_text(source_image_path, generated_image, generated_paragraph, outfile_name):
     source_image = Image.open(source_image_path)
     # Create a new image that can fit the images and the text

     width, height = image.size
     return width, height
+def resize_long_edge(image, target_size=256):
+    # Calculate the aspect ratio
+    width, height = image.size
+    aspect_ratio = float(width) / float(height)
+    # Determine the new dimensions
+    if width > height:
+        new_width = target_size
+        new_height = int(target_size / aspect_ratio)
+    else:
+        new_width = int(target_size * aspect_ratio)
+        new_height = target_size
+    # Resize the image
+    resized_image = image.resize((new_width, new_height), Image.ANTIALIAS)
+    return resized_image
 def display_images_and_text(source_image_path, generated_image, generated_paragraph, outfile_name):
     source_image = Image.open(source_image_path)
     # Create a new image that can fit the images and the text