Spaces:
Sleeping
Sleeping
nursulu
commited on
Commit
•
77e0511
1
Parent(s):
6893fa1
Update
Browse files- app.py +14 -11
- fonts/Anton/Anton-Regular.ttf +0 -0
- fonts/Anton/OFL.txt +93 -0
- utils/__pycache__/image_utils.cpython-311.pyc +0 -0
- utils/__pycache__/model_utils.cpython-311.pyc +0 -0
- utils/image_utils.py +8 -30
- utils/model_utils.py +27 -11
app.py
CHANGED
@@ -11,6 +11,7 @@ import requests
|
|
11 |
import json
|
12 |
import os
|
13 |
import re
|
|
|
14 |
import torch
|
15 |
from peft import PeftModel, PeftConfig
|
16 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
@@ -41,9 +42,11 @@ def load_models():
|
|
41 |
|
42 |
# x = st.slider('Select a value')
|
43 |
# st.write(x, 'squared is', x * x)
|
44 |
-
|
|
|
45 |
caption = get_model_caption(img_path, base_model, tokenizer, hf_token)
|
46 |
-
|
|
|
47 |
return image, caption
|
48 |
|
49 |
st.title("Image Upload and Processing App")
|
@@ -58,7 +61,7 @@ def main():
|
|
58 |
uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "png", "jpeg"])
|
59 |
|
60 |
# Input widget to add Hugging Face token
|
61 |
-
hf_token = st.text_input("Enter your Hugging Face Token", type=
|
62 |
|
63 |
# Dropdown to select mood
|
64 |
# mood = st.selectbox("Select Mood", options=["happy", "angry"])
|
@@ -78,16 +81,16 @@ def main():
|
|
78 |
# Display the output
|
79 |
st.image(image, caption=f"Generated Meme: {caption}")
|
80 |
|
81 |
-
# Optionally allow downloading the meme
|
82 |
-
buf = io.BytesIO()
|
83 |
-
image.save(buf, format="PNG")
|
84 |
-
byte_im = buf.getvalue()
|
85 |
|
86 |
st.download_button(
|
87 |
-
label="Download
|
88 |
-
data=
|
89 |
-
file_name="
|
90 |
-
mime="image/
|
91 |
)
|
92 |
|
93 |
if __name__ == '__main__':
|
|
|
11 |
import json
|
12 |
import os
|
13 |
import re
|
14 |
+
|
15 |
import torch
|
16 |
from peft import PeftModel, PeftConfig
|
17 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
42 |
|
43 |
# x = st.slider('Select a value')
|
44 |
# st.write(x, 'squared is', x * x)
|
45 |
+
|
46 |
+
def generate_meme_from_image(img_path, base_model, tokenizer, hf_token, device='cuda'):
|
47 |
caption = get_model_caption(img_path, base_model, tokenizer, hf_token)
|
48 |
+
print(caption)
|
49 |
+
image = overlay_caption(caption, img_path)
|
50 |
return image, caption
|
51 |
|
52 |
st.title("Image Upload and Processing App")
|
|
|
61 |
uploaded_image = st.file_uploader("Upload an Image", type=["jpg", "png", "jpeg"])
|
62 |
|
63 |
# Input widget to add Hugging Face token
|
64 |
+
hf_token = st.text_input("Enter your Hugging Face Token", type='default')
|
65 |
|
66 |
# Dropdown to select mood
|
67 |
# mood = st.selectbox("Select Mood", options=["happy", "angry"])
|
|
|
81 |
# Display the output
|
82 |
st.image(image, caption=f"Generated Meme: {caption}")
|
83 |
|
84 |
+
# # Optionally allow downloading the meme
|
85 |
+
# buf = io.BytesIO()
|
86 |
+
# image.save(buf, format="PNG")
|
87 |
+
# byte_im = buf.getvalue()
|
88 |
|
89 |
st.download_button(
|
90 |
+
label="Download Image with Caption",
|
91 |
+
data=image,
|
92 |
+
file_name="captioned_image.jpg",
|
93 |
+
mime="image/jpeg"
|
94 |
)
|
95 |
|
96 |
if __name__ == '__main__':
|
fonts/Anton/Anton-Regular.ttf
ADDED
Binary file (162 kB). View file
|
|
fonts/Anton/OFL.txt
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Copyright 2020 The Anton Project Authors (https://github.com/googlefonts/AntonFont.git)
|
2 |
+
|
3 |
+
This Font Software is licensed under the SIL Open Font License, Version 1.1.
|
4 |
+
This license is copied below, and is also available with a FAQ at:
|
5 |
+
https://openfontlicense.org
|
6 |
+
|
7 |
+
|
8 |
+
-----------------------------------------------------------
|
9 |
+
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
|
10 |
+
-----------------------------------------------------------
|
11 |
+
|
12 |
+
PREAMBLE
|
13 |
+
The goals of the Open Font License (OFL) are to stimulate worldwide
|
14 |
+
development of collaborative font projects, to support the font creation
|
15 |
+
efforts of academic and linguistic communities, and to provide a free and
|
16 |
+
open framework in which fonts may be shared and improved in partnership
|
17 |
+
with others.
|
18 |
+
|
19 |
+
The OFL allows the licensed fonts to be used, studied, modified and
|
20 |
+
redistributed freely as long as they are not sold by themselves. The
|
21 |
+
fonts, including any derivative works, can be bundled, embedded,
|
22 |
+
redistributed and/or sold with any software provided that any reserved
|
23 |
+
names are not used by derivative works. The fonts and derivatives,
|
24 |
+
however, cannot be released under any other type of license. The
|
25 |
+
requirement for fonts to remain under this license does not apply
|
26 |
+
to any document created using the fonts or their derivatives.
|
27 |
+
|
28 |
+
DEFINITIONS
|
29 |
+
"Font Software" refers to the set of files released by the Copyright
|
30 |
+
Holder(s) under this license and clearly marked as such. This may
|
31 |
+
include source files, build scripts and documentation.
|
32 |
+
|
33 |
+
"Reserved Font Name" refers to any names specified as such after the
|
34 |
+
copyright statement(s).
|
35 |
+
|
36 |
+
"Original Version" refers to the collection of Font Software components as
|
37 |
+
distributed by the Copyright Holder(s).
|
38 |
+
|
39 |
+
"Modified Version" refers to any derivative made by adding to, deleting,
|
40 |
+
or substituting -- in part or in whole -- any of the components of the
|
41 |
+
Original Version, by changing formats or by porting the Font Software to a
|
42 |
+
new environment.
|
43 |
+
|
44 |
+
"Author" refers to any designer, engineer, programmer, technical
|
45 |
+
writer or other person who contributed to the Font Software.
|
46 |
+
|
47 |
+
PERMISSION & CONDITIONS
|
48 |
+
Permission is hereby granted, free of charge, to any person obtaining
|
49 |
+
a copy of the Font Software, to use, study, copy, merge, embed, modify,
|
50 |
+
redistribute, and sell modified and unmodified copies of the Font
|
51 |
+
Software, subject to the following conditions:
|
52 |
+
|
53 |
+
1) Neither the Font Software nor any of its individual components,
|
54 |
+
in Original or Modified Versions, may be sold by itself.
|
55 |
+
|
56 |
+
2) Original or Modified Versions of the Font Software may be bundled,
|
57 |
+
redistributed and/or sold with any software, provided that each copy
|
58 |
+
contains the above copyright notice and this license. These can be
|
59 |
+
included either as stand-alone text files, human-readable headers or
|
60 |
+
in the appropriate machine-readable metadata fields within text or
|
61 |
+
binary files as long as those fields can be easily viewed by the user.
|
62 |
+
|
63 |
+
3) No Modified Version of the Font Software may use the Reserved Font
|
64 |
+
Name(s) unless explicit written permission is granted by the corresponding
|
65 |
+
Copyright Holder. This restriction only applies to the primary font name as
|
66 |
+
presented to the users.
|
67 |
+
|
68 |
+
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
|
69 |
+
Software shall not be used to promote, endorse or advertise any
|
70 |
+
Modified Version, except to acknowledge the contribution(s) of the
|
71 |
+
Copyright Holder(s) and the Author(s) or with their explicit written
|
72 |
+
permission.
|
73 |
+
|
74 |
+
5) The Font Software, modified or unmodified, in part or in whole,
|
75 |
+
must be distributed entirely under this license, and must not be
|
76 |
+
distributed under any other license. The requirement for fonts to
|
77 |
+
remain under this license does not apply to any document created
|
78 |
+
using the Font Software.
|
79 |
+
|
80 |
+
TERMINATION
|
81 |
+
This license becomes null and void if any of the above conditions are
|
82 |
+
not met.
|
83 |
+
|
84 |
+
DISCLAIMER
|
85 |
+
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
86 |
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
|
87 |
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
88 |
+
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
|
89 |
+
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
90 |
+
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
|
91 |
+
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
92 |
+
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
|
93 |
+
OTHER DEALINGS IN THE FONT SOFTWARE.
|
utils/__pycache__/image_utils.cpython-311.pyc
CHANGED
Binary files a/utils/__pycache__/image_utils.cpython-311.pyc and b/utils/__pycache__/image_utils.cpython-311.pyc differ
|
|
utils/__pycache__/model_utils.cpython-311.pyc
CHANGED
Binary files a/utils/__pycache__/model_utils.cpython-311.pyc and b/utils/__pycache__/model_utils.cpython-311.pyc differ
|
|
utils/image_utils.py
CHANGED
@@ -2,30 +2,7 @@ import os
|
|
2 |
import re
|
3 |
from PIL import Image, ImageDraw, ImageFont
|
4 |
import textwrap
|
5 |
-
|
6 |
-
|
7 |
-
def get_unique_filename(filename):
|
8 |
-
"""
|
9 |
-
Generate a unique filename by appending a number if a file with the same name already exists.
|
10 |
-
"""
|
11 |
-
if not os.path.exists(filename):
|
12 |
-
return filename
|
13 |
-
|
14 |
-
base, ext = os.path.splitext(filename)
|
15 |
-
counter = 1
|
16 |
-
new_filename = f"{base}_{counter}{ext}"
|
17 |
-
|
18 |
-
while os.path.exists(new_filename):
|
19 |
-
counter += 1
|
20 |
-
new_filename = f"{base}_{counter}{ext}"
|
21 |
-
|
22 |
-
return new_filename
|
23 |
-
|
24 |
-
|
25 |
-
def save_image_with_unique_name(image, path):
|
26 |
-
unique_path = get_unique_filename(path)
|
27 |
-
image.save(unique_path)
|
28 |
-
print(f"Image saved as: {unique_path}")
|
29 |
|
30 |
def find_text_in_answer(text):
|
31 |
print("Full caption:", text)
|
@@ -97,8 +74,8 @@ def calculate_text_height(caption, font, max_width):
|
|
97 |
draw = ImageDraw.Draw(image)
|
98 |
return draw_text(draw, caption, (0, 0), font, max_width)
|
99 |
|
100 |
-
def add_caption(image_path, caption,
|
101 |
-
image =
|
102 |
draw = ImageDraw.Draw(image)
|
103 |
width, height = image.size
|
104 |
|
@@ -136,13 +113,14 @@ def add_caption(image_path, caption, output_path, top_margin=10, bottom_margin=1
|
|
136 |
bottom_caption_position = (width // 10, height - min_distance_from_bottom_px - bottom_caption_height)
|
137 |
draw_text(draw, bottom_caption, bottom_caption_position, font, width - 2 * (width // 10))
|
138 |
|
139 |
-
|
|
|
|
|
140 |
return image
|
141 |
|
142 |
|
143 |
-
def overlay_caption(text, img_path
|
144 |
-
img_name = img_path.split("/")[-1]
|
145 |
text = find_text_in_answer(text)
|
146 |
text = text.strip(".")
|
147 |
-
image = add_caption(img_path, text
|
148 |
return image
|
|
|
2 |
import re
|
3 |
from PIL import Image, ImageDraw, ImageFont
|
4 |
import textwrap
|
5 |
+
import io
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
def find_text_in_answer(text):
|
8 |
print("Full caption:", text)
|
|
|
74 |
draw = ImageDraw.Draw(image)
|
75 |
return draw_text(draw, caption, (0, 0), font, max_width)
|
76 |
|
77 |
+
def add_caption(image_path, caption, top_margin=10, bottom_margin=10, max_caption_length=10, min_distance_from_bottom_mm=10):
|
78 |
+
image = image_path
|
79 |
draw = ImageDraw.Draw(image)
|
80 |
width, height = image.size
|
81 |
|
|
|
113 |
bottom_caption_position = (width // 10, height - min_distance_from_bottom_px - bottom_caption_height)
|
114 |
draw_text(draw, bottom_caption, bottom_caption_position, font, width - 2 * (width // 10))
|
115 |
|
116 |
+
buffered = io.BytesIO()
|
117 |
+
image.save(buffered, format="JPEG")
|
118 |
+
return buffered.getvalue()
|
119 |
return image
|
120 |
|
121 |
|
122 |
+
def overlay_caption(text, img_path):
|
|
|
123 |
text = find_text_in_answer(text)
|
124 |
text = text.strip(".")
|
125 |
+
image = add_caption(img_path, text)
|
126 |
return image
|
utils/model_utils.py
CHANGED
@@ -6,26 +6,37 @@ import os
|
|
6 |
from tqdm import tqdm
|
7 |
import re
|
8 |
import torch
|
|
|
|
|
9 |
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
def query_clip(data, hf_token):
|
13 |
API_URL = "https://api-inference.huggingface.co/models/openai/clip-vit-base-patch32"
|
14 |
headers = {"Authorization": f"Bearer {hf_token}"}
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
17 |
payload={
|
18 |
"parameters": data["parameters"],
|
19 |
-
"inputs":
|
20 |
}
|
21 |
response = requests.post(API_URL, headers=headers, json=payload)
|
22 |
return response.json()
|
23 |
|
24 |
|
25 |
-
def get_sentiment(
|
26 |
print("Getting the sentiment of the image...")
|
27 |
output = query_clip({
|
28 |
-
"
|
29 |
"parameters": {"candidate_labels": ["angry", "happy"]},
|
30 |
}, hf_token)
|
31 |
try:
|
@@ -36,18 +47,22 @@ def get_sentiment(img_path, hf_token):
|
|
36 |
print("If the model is loading, try again in a minute. If you've reached a query limit (300 per hour), try within the next hour.")
|
37 |
|
38 |
|
39 |
-
def query_blip(
|
40 |
API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
|
41 |
headers = {"Authorization": f"Bearer {hf_token}"}
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
45 |
return response.json()
|
46 |
|
47 |
|
48 |
-
def get_description(
|
49 |
print("Getting the context of the image...")
|
50 |
-
output = query_blip(
|
51 |
|
52 |
try:
|
53 |
print("Context:", output[0]['generated_text'])
|
@@ -72,6 +87,7 @@ def get_model_caption(img_path, base_model, tokenizer, hf_token, device='cuda'):
|
|
72 |
print("Generating captions...")
|
73 |
encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
|
74 |
model_inputs = encodeds.to(device)
|
|
|
75 |
base_model.set_adapter(sentiment)
|
76 |
base_model.to(device)
|
77 |
generated_ids = base_model.generate(**model_inputs, max_new_tokens=20, do_sample=True, pad_token_id=tokenizer.eos_token_id)
|
|
|
6 |
from tqdm import tqdm
|
7 |
import re
|
8 |
import torch
|
9 |
+
import io
|
10 |
+
from PIL import Image
|
11 |
|
12 |
+
def image_to_bytes(image):
|
13 |
+
"""Convert PIL Image to bytes."""
|
14 |
+
buffer = io.BytesIO()
|
15 |
+
image.save(buffer, format="JPEG") # Adjust format if necessary
|
16 |
+
return buffer.getvalue()
|
17 |
|
18 |
|
19 |
def query_clip(data, hf_token):
|
20 |
API_URL = "https://api-inference.huggingface.co/models/openai/clip-vit-base-patch32"
|
21 |
headers = {"Authorization": f"Bearer {hf_token}"}
|
22 |
+
img = data['image']
|
23 |
+
img_bytes = image_to_bytes(img)
|
24 |
+
image = Image.open(io.BytesIO(img_bytes))
|
25 |
+
|
26 |
+
encoded_img = base64.b64encode(img_bytes).decode("utf-8")
|
27 |
+
|
28 |
payload={
|
29 |
"parameters": data["parameters"],
|
30 |
+
"inputs": encoded_img
|
31 |
}
|
32 |
response = requests.post(API_URL, headers=headers, json=payload)
|
33 |
return response.json()
|
34 |
|
35 |
|
36 |
+
def get_sentiment(img, hf_token):
|
37 |
print("Getting the sentiment of the image...")
|
38 |
output = query_clip({
|
39 |
+
"image": img,
|
40 |
"parameters": {"candidate_labels": ["angry", "happy"]},
|
41 |
}, hf_token)
|
42 |
try:
|
|
|
47 |
print("If the model is loading, try again in a minute. If you've reached a query limit (300 per hour), try within the next hour.")
|
48 |
|
49 |
|
50 |
+
def query_blip(img, hf_token):
|
51 |
API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
|
52 |
headers = {"Authorization": f"Bearer {hf_token}"}
|
53 |
+
|
54 |
+
img_bytes = image_to_bytes(img)
|
55 |
+
|
56 |
+
files = {
|
57 |
+
'file': ('image.jpg', img_bytes, 'image/jpeg')
|
58 |
+
}
|
59 |
+
response = requests.post(API_URL, headers=headers, data=files)
|
60 |
return response.json()
|
61 |
|
62 |
|
63 |
+
def get_description(img, hf_token):
|
64 |
print("Getting the context of the image...")
|
65 |
+
output = query_blip(img, hf_token)
|
66 |
|
67 |
try:
|
68 |
print("Context:", output[0]['generated_text'])
|
|
|
87 |
print("Generating captions...")
|
88 |
encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
|
89 |
model_inputs = encodeds.to(device)
|
90 |
+
print("sentiment", sentiment)
|
91 |
base_model.set_adapter(sentiment)
|
92 |
base_model.to(device)
|
93 |
generated_ids = base_model.generate(**model_inputs, max_new_tokens=20, do_sample=True, pad_token_id=tokenizer.eos_token_id)
|