Awiny commited on
Commit
51f8a02
β€’
1 Parent(s): 9b4b3ea

reduce image size for speed up

Browse files
app.py CHANGED
@@ -123,6 +123,7 @@ interface = gr.Interface(
123
  \n Twitter: https://twitter.com/awinyimgprocess/status/1646225454599372800?s=46&t=HvOe9T2n35iFuCHP5aIHpQ
124
  \n Since GPU is expensive, we use CPU for demo and not include semantic segment anything. Run code local with gpu or google colab we provided for fast speed.
125
  \n Ttext2image model is controlnet ( very slow in cpu(~2m)), which used canny edge as reference.
 
126
  """
127
  )
128
 
 
123
  \n Twitter: https://twitter.com/awinyimgprocess/status/1646225454599372800?s=46&t=HvOe9T2n35iFuCHP5aIHpQ
124
  \n Since GPU is expensive, we use CPU for demo and not include semantic segment anything. Run code local with gpu or google colab we provided for fast speed.
125
  \n Ttext2image model is controlnet ( very slow in cpu(~2m)), which used canny edge as reference.
126
+ \n To speed up, we generate image with small size 256, run the code local for high-quality sample.
127
  """
128
  )
129
 
models/__pycache__/blip2_model.cpython-38.pyc CHANGED
Binary files a/models/__pycache__/blip2_model.cpython-38.pyc and b/models/__pycache__/blip2_model.cpython-38.pyc differ
 
models/__pycache__/controlnet_model.cpython-38.pyc CHANGED
Binary files a/models/__pycache__/controlnet_model.cpython-38.pyc and b/models/__pycache__/controlnet_model.cpython-38.pyc differ
 
models/__pycache__/image_text_transformation.cpython-38.pyc CHANGED
Binary files a/models/__pycache__/image_text_transformation.cpython-38.pyc and b/models/__pycache__/image_text_transformation.cpython-38.pyc differ
 
models/blip2_model.py CHANGED
@@ -2,6 +2,7 @@ from PIL import Image
2
  import requests
3
  from transformers import Blip2Processor, Blip2ForConditionalGeneration
4
  import torch
 
5
 
6
 
7
  class ImageCaptioning:
@@ -18,15 +19,12 @@ class ImageCaptioning:
18
  model = Blip2ForConditionalGeneration.from_pretrained(
19
  "pretrained_models/blip2-opt-2.7b", torch_dtype=self.data_type
20
  )
21
- # processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
22
- # model = Blip2ForConditionalGeneration.from_pretrained(
23
- # "Salesforce/blip2-opt-2.7b", torch_dtype=self.data_type
24
- # )
25
  model.to(self.device)
26
  return processor, model
27
 
28
  def image_caption(self, image_src):
29
  image = Image.open(image_src)
 
30
  inputs = self.processor(images=image, return_tensors="pt").to(self.device, self.data_type)
31
  generated_ids = self.model.generate(**inputs)
32
  generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
 
2
  import requests
3
  from transformers import Blip2Processor, Blip2ForConditionalGeneration
4
  import torch
5
+ from utils.util import resize_long_edge
6
 
7
 
8
  class ImageCaptioning:
 
19
  model = Blip2ForConditionalGeneration.from_pretrained(
20
  "pretrained_models/blip2-opt-2.7b", torch_dtype=self.data_type
21
  )
 
 
 
 
22
  model.to(self.device)
23
  return processor, model
24
 
25
  def image_caption(self, image_src):
26
  image = Image.open(image_src)
27
+ image = resize_long_edge(image)
28
  inputs = self.processor(images=image, return_tensors="pt").to(self.device, self.data_type)
29
  generated_ids = self.model.generate(**inputs)
30
  generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
models/image_text_transformation.py CHANGED
@@ -9,6 +9,7 @@ from PIL import Image
9
  import base64
10
  from io import BytesIO
11
  import os
 
12
 
13
  def pil_image_to_base64(image):
14
  buffered = BytesIO()
@@ -41,6 +42,7 @@ class ImageTextTransformation:
41
  def image_to_text(self, img_src):
42
  # the information to generate paragraph based on the context
43
  self.ref_image = Image.open(img_src)
 
44
  width, height = read_image_width_height(img_src)
45
  print(self.args)
46
  if self.args.image_caption:
 
9
  import base64
10
  from io import BytesIO
11
  import os
12
+ from utils.util import resize_long_edge
13
 
14
  def pil_image_to_base64(image):
15
  buffered = BytesIO()
 
42
  def image_to_text(self, img_src):
43
  # the information to generate paragraph based on the context
44
  self.ref_image = Image.open(img_src)
45
+ self.ref_image = resize_long_edge(self.ref_image)
46
  width, height = read_image_width_height(img_src)
47
  print(self.args)
48
  if self.args.image_caption:
utils/__pycache__/util.cpython-38.pyc CHANGED
Binary files a/utils/__pycache__/util.cpython-38.pyc and b/utils/__pycache__/util.cpython-38.pyc differ
 
utils/util.py CHANGED
@@ -14,6 +14,24 @@ def read_image_width_height(image_path):
14
  width, height = image.size
15
  return width, height
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def display_images_and_text(source_image_path, generated_image, generated_paragraph, outfile_name):
18
  source_image = Image.open(source_image_path)
19
  # Create a new image that can fit the images and the text
 
14
  width, height = image.size
15
  return width, height
16
 
17
+
18
+ def resize_long_edge(image, target_size=256):
19
+ # Calculate the aspect ratio
20
+ width, height = image.size
21
+ aspect_ratio = float(width) / float(height)
22
+
23
+ # Determine the new dimensions
24
+ if width > height:
25
+ new_width = target_size
26
+ new_height = int(target_size / aspect_ratio)
27
+ else:
28
+ new_width = int(target_size * aspect_ratio)
29
+ new_height = target_size
30
+
31
+ # Resize the image
32
+ resized_image = image.resize((new_width, new_height), Image.ANTIALIAS)
33
+ return resized_image
34
+
35
  def display_images_and_text(source_image_path, generated_image, generated_paragraph, outfile_name):
36
  source_image = Image.open(source_image_path)
37
  # Create a new image that can fit the images and the text