Spaces:
wssb
/
Runtime error

File size: 5,846 Bytes
d2a06b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import gradio as gr
import numpy as np
# from edict_functions import EDICT_editing
from PIL import Image
from utils import Endpoint, get_token
from io import BytesIO
import requests


endpoint = Endpoint()

def local_edict(x, source_text, edit_text,
         edit_strength, guidance_scale,
          steps=50, mix_weight=0.93, ):
    x = Image.fromarray(x)
    return_im =  EDICT_editing(x,
                         source_text,
                         edit_text,
                  steps=steps,
                  mix_weight=mix_weight,
                  init_image_strength=edit_strength,
                  guidance_scale=guidance_scale
                              )[0]
    return np.array(return_im)

def encode_image(image):
    buffered = BytesIO()
    image.save(buffered, format="JPEG", quality=95)
    buffered.seek(0)

    return buffered



def decode_image(img_obj):
    img = Image.open(img_obj).convert("RGB")
    return img

def edict(x, source_text, edit_text,
         edit_strength, guidance_scale,
          steps=50, mix_weight=0.93, ):

    url = endpoint.url
    url = url + "/api/edit"
    headers = {### Misc.

        "User-Agent": "EDICT HuggingFace Space",
        "Auth-Token": get_token(),
    }

    data = {
        "source_text": source_text,
        "edit_text": edit_text,
        "edit_strength": edit_strength,
        "guidance_scale": guidance_scale,
    }

    image = encode_image(Image.fromarray(x))
    files = {"image": image}  

    response = requests.post(url, data=data, files=files, headers=headers)
    
    if response.status_code == 200:
        return np.array(decode_image(BytesIO(response.content)))
    else:
        return "Error: " + response.text   
    # x = decode_image(response)
    # return np.array(x)

examples = [
        ['square_ims/american_gothic.jpg', 'A painting of two people frowning', 'A painting of two people smiling', 0.5, 3],
        ['square_ims/colloseum.jpg', 'An old ruined building', 'A new modern office building', 0.8, 3],
    ]


examples.append(['square_ims/scream.jpg', 'A painting of someone screaming', 'A painting of an alien', 0.5, 3])
examples.append(['square_ims/yosemite.jpg', 'Granite forest valley', 'Granite desert valley', 0.8, 3])
examples.append(['square_ims/einstein.jpg', 'Mouth open', 'Mouth closed', 0.8, 3])
examples.append(['square_ims/einstein.jpg', 'A man', 'A man in K.I.S.S. facepaint', 0.8, 3])
"""
examples.extend([
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Chinese New Year cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Union Jack cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Nigerian flag cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Santa Claus cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'An Easter cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A hedgehog cupcake', 0.8, 3],
        ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A rose cupcake', 0.8, 3],
    ])
"""

for dog_i in [1, 2]:
    for breed in ['Golden Retriever', 'Chihuahua', 'Dalmatian']:
        examples.append([f'square_ims/imagenet_dog_{dog_i}.jpg', 'A dog', f'A {breed}', 0.8, 3])


description = 'A gradio demo for [EDICT](https://arxiv.org/abs/2211.12446) (CVPR23)'
# description = gr.Markdown(description)

article = """

### Prompting Style

As with many text-to-image methods, the prompting style of EDICT can make a big difference. When in doubt, experiment! Some guidance:
* Parallel *Original Description* and *Edit Description* construction as much as possible. Inserting/editing single words often is enough to affect a change while maintaining a lot of the original structure
* Words that will affect the entire setting (e.g. "A photo of " vs. "A painting of") can make a big difference. Playing around with them can help a lot

### Parameters
Both `edit_strength` and `guidance_scale` have similar properties qualitatively: the higher the value the more the image will change. We suggest
* Increasing/decreasing `edit_strength` first, particularly to alter/preserve more of the original structure/content
* Then changing `guidance_scale` to make the change in the edited region more or less pronounced.

Usually we find changing `edit_strength` to be enough, but feel free to play around (and report any interesting results)!

### Misc.

Having difficulty coming up with a caption? Try [BLIP](https://huggingface.co/spaces/Salesforce/BLIP2) to automatically generate one!

As with most StableDiffusion approaches, faces/text are often problematic to render, especially if they're small. Having these in the foreground will help keep them cleaner.

A returned black image means that the [Safety Checker](https://huggingface.co/CompVis/stable-diffusion-safety-checker) triggered on the photo. This happens in odd cases sometimes (it often rejects
the huggingface logo or variations), but we need to keep it in for obvious reasons.
"""
# article = gr.Markdown(description)

iface = gr.Interface(fn=edict, inputs=["image",
                                       gr.Textbox(label="Original Description"),
                                       gr.Textbox(label="Edit Description"),
                                       # 50, # gr.Slider(5, 50, value=20, step=1),
                                       # 0.93, # gr.Slider(0.5, 1, value=0.7, step=0.05),
                                       gr.Slider(0.0, 1, value=0.8, step=0.05),
                                       gr.Slider(0, 10, value=3, step=0.5),
                                      ],
                     examples = examples,
                     outputs="image",
                     description=description,
                     article=article,
                     cache_examples=True)
iface.launch()