|
import gradio as gr |
|
import numpy as np |
|
|
|
from PIL import Image |
|
from utils import Endpoint, get_token |
|
from io import BytesIO |
|
import requests |
|
|
|
|
|
endpoint = Endpoint() |
|
|
|
def local_edict(x, source_text, edit_text, |
|
edit_strength, guidance_scale, |
|
steps=50, mix_weight=0.93, ): |
|
x = Image.fromarray(x) |
|
return_im = EDICT_editing(x, |
|
source_text, |
|
edit_text, |
|
steps=steps, |
|
mix_weight=mix_weight, |
|
init_image_strength=edit_strength, |
|
guidance_scale=guidance_scale |
|
)[0] |
|
return np.array(return_im) |
|
|
|
def encode_image(image): |
|
buffered = BytesIO() |
|
image.save(buffered, format="JPEG", quality=95) |
|
buffered.seek(0) |
|
|
|
return buffered |
|
|
|
|
|
|
|
def decode_image(img_obj): |
|
img = Image.open(img_obj).convert("RGB") |
|
return img |
|
|
|
def edict(x, source_text, edit_text, |
|
edit_strength, guidance_scale, |
|
steps=50, mix_weight=0.93, ): |
|
|
|
url = endpoint.url |
|
url = url + "/api/edit" |
|
headers = { |
|
|
|
"User-Agent": "EDICT HuggingFace Space", |
|
"Auth-Token": get_token(), |
|
} |
|
|
|
data = { |
|
"source_text": source_text, |
|
"edit_text": edit_text, |
|
"edit_strength": edit_strength, |
|
"guidance_scale": guidance_scale, |
|
} |
|
|
|
image = encode_image(Image.fromarray(x)) |
|
files = {"image": image} |
|
|
|
response = requests.post(url, data=data, files=files, headers=headers) |
|
|
|
if response.status_code == 200: |
|
return np.array(decode_image(BytesIO(response.content))) |
|
else: |
|
return "Error: " + response.text |
|
|
|
|
|
|
|
examples = [ |
|
['square_ims/american_gothic.jpg', 'A painting of two people frowning', 'A painting of two people smiling', 0.5, 3], |
|
['square_ims/colloseum.jpg', 'An old ruined building', 'A new modern office building', 0.8, 3], |
|
] |
|
|
|
|
|
examples.append(['square_ims/scream.jpg', 'A painting of someone screaming', 'A painting of an alien', 0.5, 3]) |
|
examples.append(['square_ims/yosemite.jpg', 'Granite forest valley', 'Granite desert valley', 0.8, 3]) |
|
examples.append(['square_ims/einstein.jpg', 'Mouth open', 'Mouth closed', 0.8, 3]) |
|
examples.append(['square_ims/einstein.jpg', 'A man', 'A man in K.I.S.S. facepaint', 0.8, 3]) |
|
""" |
|
examples.extend([ |
|
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Chinese New Year cupcake', 0.8, 3], |
|
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Union Jack cupcake', 0.8, 3], |
|
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Nigerian flag cupcake', 0.8, 3], |
|
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Santa Claus cupcake', 0.8, 3], |
|
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'An Easter cupcake', 0.8, 3], |
|
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A hedgehog cupcake', 0.8, 3], |
|
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A rose cupcake', 0.8, 3], |
|
]) |
|
""" |
|
|
|
for dog_i in [1, 2]: |
|
for breed in ['Golden Retriever', 'Chihuahua', 'Dalmatian']: |
|
examples.append([f'square_ims/imagenet_dog_{dog_i}.jpg', 'A dog', f'A {breed}', 0.8, 3]) |
|
|
|
|
|
description = 'A gradio demo for [EDICT](https://arxiv.org/abs/2211.12446) (CVPR23)' |
|
|
|
|
|
article = """ |
|
|
|
### Prompting Style |
|
|
|
As with many text-to-image methods, the prompting style of EDICT can make a big difference. When in doubt, experiment! Some guidance: |
|
* Parallel *Original Description* and *Edit Description* construction as much as possible. Inserting/editing single words often is enough to affect a change while maintaining a lot of the original structure |
|
* Words that will affect the entire setting (e.g. "A photo of " vs. "A painting of") can make a big difference. Playing around with them can help a lot |
|
|
|
### Parameters |
|
Both `edit_strength` and `guidance_scale` have similar properties qualitatively: the higher the value the more the image will change. We suggest |
|
* Increasing/decreasing `edit_strength` first, particularly to alter/preserve more of the original structure/content |
|
* Then changing `guidance_scale` to make the change in the edited region more or less pronounced. |
|
|
|
Usually we find changing `edit_strength` to be enough, but feel free to play around (and report any interesting results)! |
|
|
|
### Misc. |
|
|
|
Having difficulty coming up with a caption? Try [BLIP](https://huggingface.co/spaces/Salesforce/BLIP2) to automatically generate one! |
|
|
|
As with most StableDiffusion approaches, faces/text are often problematic to render, especially if they're small. Having these in the foreground will help keep them cleaner. |
|
|
|
A returned black image means that the [Safety Checker](https://huggingface.co/CompVis/stable-diffusion-safety-checker) triggered on the photo. This happens in odd cases sometimes (it often rejects |
|
the huggingface logo or variations), but we need to keep it in for obvious reasons. |
|
""" |
|
|
|
|
|
iface = gr.Interface(fn=edict, inputs=["image", |
|
gr.Textbox(label="Original Description"), |
|
gr.Textbox(label="Edit Description"), |
|
|
|
|
|
gr.Slider(0.0, 1, value=0.8, step=0.05), |
|
gr.Slider(0, 10, value=3, step=0.5), |
|
], |
|
examples = examples, |
|
outputs="image", |
|
description=description, |
|
article=article, |
|
cache_examples=True) |
|
iface.launch() |
|
|