Spaces:
Runtime error
Runtime error
reduce image size for speed up
Browse files- app.py +1 -0
- models/__pycache__/blip2_model.cpython-38.pyc +0 -0
- models/__pycache__/controlnet_model.cpython-38.pyc +0 -0
- models/__pycache__/image_text_transformation.cpython-38.pyc +0 -0
- models/blip2_model.py +2 -4
- models/image_text_transformation.py +2 -0
- utils/__pycache__/util.cpython-38.pyc +0 -0
- utils/util.py +18 -0
app.py
CHANGED
@@ -123,6 +123,7 @@ interface = gr.Interface(
|
|
123 |
\n Twitter: https://twitter.com/awinyimgprocess/status/1646225454599372800?s=46&t=HvOe9T2n35iFuCHP5aIHpQ
|
124 |
\n Since GPU is expensive, we use CPU for demo and not include semantic segment anything. Run code local with gpu or google colab we provided for fast speed.
|
125 |
\n Ttext2image model is controlnet ( very slow in cpu(~2m)), which used canny edge as reference.
|
|
|
126 |
"""
|
127 |
)
|
128 |
|
|
|
123 |
\n Twitter: https://twitter.com/awinyimgprocess/status/1646225454599372800?s=46&t=HvOe9T2n35iFuCHP5aIHpQ
|
124 |
\n Since GPU is expensive, we use CPU for demo and not include semantic segment anything. Run code local with gpu or google colab we provided for fast speed.
|
125 |
\n Ttext2image model is controlnet ( very slow in cpu(~2m)), which used canny edge as reference.
|
126 |
+
\n To speed up, we generate image with small size 256, run the code local for high-quality sample.
|
127 |
"""
|
128 |
)
|
129 |
|
models/__pycache__/blip2_model.cpython-38.pyc
CHANGED
Binary files a/models/__pycache__/blip2_model.cpython-38.pyc and b/models/__pycache__/blip2_model.cpython-38.pyc differ
|
|
models/__pycache__/controlnet_model.cpython-38.pyc
CHANGED
Binary files a/models/__pycache__/controlnet_model.cpython-38.pyc and b/models/__pycache__/controlnet_model.cpython-38.pyc differ
|
|
models/__pycache__/image_text_transformation.cpython-38.pyc
CHANGED
Binary files a/models/__pycache__/image_text_transformation.cpython-38.pyc and b/models/__pycache__/image_text_transformation.cpython-38.pyc differ
|
|
models/blip2_model.py
CHANGED
@@ -2,6 +2,7 @@ from PIL import Image
|
|
2 |
import requests
|
3 |
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
4 |
import torch
|
|
|
5 |
|
6 |
|
7 |
class ImageCaptioning:
|
@@ -18,15 +19,12 @@ class ImageCaptioning:
|
|
18 |
model = Blip2ForConditionalGeneration.from_pretrained(
|
19 |
"pretrained_models/blip2-opt-2.7b", torch_dtype=self.data_type
|
20 |
)
|
21 |
-
# processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
|
22 |
-
# model = Blip2ForConditionalGeneration.from_pretrained(
|
23 |
-
# "Salesforce/blip2-opt-2.7b", torch_dtype=self.data_type
|
24 |
-
# )
|
25 |
model.to(self.device)
|
26 |
return processor, model
|
27 |
|
28 |
def image_caption(self, image_src):
|
29 |
image = Image.open(image_src)
|
|
|
30 |
inputs = self.processor(images=image, return_tensors="pt").to(self.device, self.data_type)
|
31 |
generated_ids = self.model.generate(**inputs)
|
32 |
generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
|
|
2 |
import requests
|
3 |
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
4 |
import torch
|
5 |
+
from utils.util import resize_long_edge
|
6 |
|
7 |
|
8 |
class ImageCaptioning:
|
|
|
19 |
model = Blip2ForConditionalGeneration.from_pretrained(
|
20 |
"pretrained_models/blip2-opt-2.7b", torch_dtype=self.data_type
|
21 |
)
|
|
|
|
|
|
|
|
|
22 |
model.to(self.device)
|
23 |
return processor, model
|
24 |
|
25 |
def image_caption(self, image_src):
|
26 |
image = Image.open(image_src)
|
27 |
+
image = resize_long_edge(image)
|
28 |
inputs = self.processor(images=image, return_tensors="pt").to(self.device, self.data_type)
|
29 |
generated_ids = self.model.generate(**inputs)
|
30 |
generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
models/image_text_transformation.py
CHANGED
@@ -9,6 +9,7 @@ from PIL import Image
|
|
9 |
import base64
|
10 |
from io import BytesIO
|
11 |
import os
|
|
|
12 |
|
13 |
def pil_image_to_base64(image):
|
14 |
buffered = BytesIO()
|
@@ -41,6 +42,7 @@ class ImageTextTransformation:
|
|
41 |
def image_to_text(self, img_src):
|
42 |
# the information to generate paragraph based on the context
|
43 |
self.ref_image = Image.open(img_src)
|
|
|
44 |
width, height = read_image_width_height(img_src)
|
45 |
print(self.args)
|
46 |
if self.args.image_caption:
|
|
|
9 |
import base64
|
10 |
from io import BytesIO
|
11 |
import os
|
12 |
+
from utils.util import resize_long_edge
|
13 |
|
14 |
def pil_image_to_base64(image):
|
15 |
buffered = BytesIO()
|
|
|
42 |
def image_to_text(self, img_src):
|
43 |
# the information to generate paragraph based on the context
|
44 |
self.ref_image = Image.open(img_src)
|
45 |
+
self.ref_image = resize_long_edge(self.ref_image)
|
46 |
width, height = read_image_width_height(img_src)
|
47 |
print(self.args)
|
48 |
if self.args.image_caption:
|
utils/__pycache__/util.cpython-38.pyc
CHANGED
Binary files a/utils/__pycache__/util.cpython-38.pyc and b/utils/__pycache__/util.cpython-38.pyc differ
|
|
utils/util.py
CHANGED
@@ -14,6 +14,24 @@ def read_image_width_height(image_path):
|
|
14 |
width, height = image.size
|
15 |
return width, height
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def display_images_and_text(source_image_path, generated_image, generated_paragraph, outfile_name):
|
18 |
source_image = Image.open(source_image_path)
|
19 |
# Create a new image that can fit the images and the text
|
|
|
14 |
width, height = image.size
|
15 |
return width, height
|
16 |
|
17 |
+
|
18 |
+
def resize_long_edge(image, target_size=256):
|
19 |
+
# Calculate the aspect ratio
|
20 |
+
width, height = image.size
|
21 |
+
aspect_ratio = float(width) / float(height)
|
22 |
+
|
23 |
+
# Determine the new dimensions
|
24 |
+
if width > height:
|
25 |
+
new_width = target_size
|
26 |
+
new_height = int(target_size / aspect_ratio)
|
27 |
+
else:
|
28 |
+
new_width = int(target_size * aspect_ratio)
|
29 |
+
new_height = target_size
|
30 |
+
|
31 |
+
# Resize the image
|
32 |
+
resized_image = image.resize((new_width, new_height), Image.ANTIALIAS)
|
33 |
+
return resized_image
|
34 |
+
|
35 |
def display_images_and_text(source_image_path, generated_image, generated_paragraph, outfile_name):
|
36 |
source_image = Image.open(source_image_path)
|
37 |
# Create a new image that can fit the images and the text
|