File size: 5,846 Bytes
d2a06b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import gradio as gr
import numpy as np
# from edict_functions import EDICT_editing
from PIL import Image
from utils import Endpoint, get_token
from io import BytesIO
import requests
endpoint = Endpoint()
def local_edict(x, source_text, edit_text,
edit_strength, guidance_scale,
steps=50, mix_weight=0.93, ):
x = Image.fromarray(x)
return_im = EDICT_editing(x,
source_text,
edit_text,
steps=steps,
mix_weight=mix_weight,
init_image_strength=edit_strength,
guidance_scale=guidance_scale
)[0]
return np.array(return_im)
def encode_image(image):
buffered = BytesIO()
image.save(buffered, format="JPEG", quality=95)
buffered.seek(0)
return buffered
def decode_image(img_obj):
img = Image.open(img_obj).convert("RGB")
return img
def edict(x, source_text, edit_text,
edit_strength, guidance_scale,
steps=50, mix_weight=0.93, ):
url = endpoint.url
url = url + "/api/edit"
headers = {### Misc.
"User-Agent": "EDICT HuggingFace Space",
"Auth-Token": get_token(),
}
data = {
"source_text": source_text,
"edit_text": edit_text,
"edit_strength": edit_strength,
"guidance_scale": guidance_scale,
}
image = encode_image(Image.fromarray(x))
files = {"image": image}
response = requests.post(url, data=data, files=files, headers=headers)
if response.status_code == 200:
return np.array(decode_image(BytesIO(response.content)))
else:
return "Error: " + response.text
# x = decode_image(response)
# return np.array(x)
examples = [
['square_ims/american_gothic.jpg', 'A painting of two people frowning', 'A painting of two people smiling', 0.5, 3],
['square_ims/colloseum.jpg', 'An old ruined building', 'A new modern office building', 0.8, 3],
]
examples.append(['square_ims/scream.jpg', 'A painting of someone screaming', 'A painting of an alien', 0.5, 3])
examples.append(['square_ims/yosemite.jpg', 'Granite forest valley', 'Granite desert valley', 0.8, 3])
examples.append(['square_ims/einstein.jpg', 'Mouth open', 'Mouth closed', 0.8, 3])
examples.append(['square_ims/einstein.jpg', 'A man', 'A man in K.I.S.S. facepaint', 0.8, 3])
"""
examples.extend([
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Chinese New Year cupcake', 0.8, 3],
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Union Jack cupcake', 0.8, 3],
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Nigerian flag cupcake', 0.8, 3],
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Santa Claus cupcake', 0.8, 3],
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'An Easter cupcake', 0.8, 3],
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A hedgehog cupcake', 0.8, 3],
['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A rose cupcake', 0.8, 3],
])
"""
for dog_i in [1, 2]:
for breed in ['Golden Retriever', 'Chihuahua', 'Dalmatian']:
examples.append([f'square_ims/imagenet_dog_{dog_i}.jpg', 'A dog', f'A {breed}', 0.8, 3])
description = 'A gradio demo for [EDICT](https://arxiv.org/abs/2211.12446) (CVPR23)'
# description = gr.Markdown(description)
article = """
### Prompting Style
As with many text-to-image methods, the prompting style of EDICT can make a big difference. When in doubt, experiment! Some guidance:
* Parallel *Original Description* and *Edit Description* construction as much as possible. Inserting/editing single words often is enough to affect a change while maintaining a lot of the original structure
* Words that will affect the entire setting (e.g. "A photo of " vs. "A painting of") can make a big difference. Playing around with them can help a lot
### Parameters
Both `edit_strength` and `guidance_scale` have similar properties qualitatively: the higher the value the more the image will change. We suggest
* Increasing/decreasing `edit_strength` first, particularly to alter/preserve more of the original structure/content
* Then changing `guidance_scale` to make the change in the edited region more or less pronounced.
Usually we find changing `edit_strength` to be enough, but feel free to play around (and report any interesting results)!
### Misc.
Having difficulty coming up with a caption? Try [BLIP](https://huggingface.co/spaces/Salesforce/BLIP2) to automatically generate one!
As with most StableDiffusion approaches, faces/text are often problematic to render, especially if they're small. Having these in the foreground will help keep them cleaner.
A returned black image means that the [Safety Checker](https://huggingface.co/CompVis/stable-diffusion-safety-checker) triggered on the photo. This happens in odd cases sometimes (it often rejects
the huggingface logo or variations), but we need to keep it in for obvious reasons.
"""
# article = gr.Markdown(description)
iface = gr.Interface(fn=edict, inputs=["image",
gr.Textbox(label="Original Description"),
gr.Textbox(label="Edit Description"),
# 50, # gr.Slider(5, 50, value=20, step=1),
# 0.93, # gr.Slider(0.5, 1, value=0.7, step=0.05),
gr.Slider(0.0, 1, value=0.8, step=0.05),
gr.Slider(0, 10, value=3, step=0.5),
],
examples = examples,
outputs="image",
description=description,
article=article,
cache_examples=True)
iface.launch()
|