Spaces:

Pusheen
/

LoCo

Sleeping

App Files Files Community

Pusheen commited on Feb 12

Commit

f016d3e

•

1 Parent(s): fff1e7a

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -0

app.py CHANGED Viewed

@@ -186,9 +186,85 @@ def click_on_display(language_instruction, grounding_texts, sketch_pad,
     return gen_images + [state]
 def generate(unet, vae, tokenizer, text_encoder, language_instruction, grounding_texts, sketch_pad,
              loss_threshold, guidance_scale, batch_size, rand_seed, max_step, loss_scale, max_iter,
              state):
     if 'boxes' not in state:
         state['boxes'] = []
     boxes = state['boxes']

     return gen_images + [state]
+def Pharse2idx(prompt, phrases):
+    def match(prompt_words: List[str], phrase_words: List[str]):
+        if prompt_words == phrase_words:
+            return True
+        for prompt_word, phrase_word in zip(prompt_words, phrase_words):
+            if prompt_word != phrase_word and prompt_word != phrase_word+'s' and prompt_word != phrase_word+'es':
+                return False
+        return True
+    phrases = [x.replace('_', ' ') for x in phrases.split('; ')]
+    print(phrases)
+    object_positions = []
+    for punc in [',', '.', ';', ':', '?', '!']:
+        prompt = prompt.replace(punc, ' '+punc)
+    words = prompt.split()
+    for phrase in phrases:
+        phrase_words = phrase.split()
+        positions = []
+        for i in range(len(words) - len(phrase_words) + 1):
+            if match(words[i:i + len(phrase_words)], phrase_words):
+                positions += list(range(i+1, i + len(phrase_words)+1))
+        if positions == []:
+            print(prompt)
+            print(phrases)
+            return None
+        object_positions.append(positions)
+    print(object_positions)
+    return object_positions
 def generate(unet, vae, tokenizer, text_encoder, language_instruction, grounding_texts, sketch_pad,
              loss_threshold, guidance_scale, batch_size, rand_seed, max_step, loss_scale, max_iter,
              state):
+    # language_inst: prompt; grounding_texts: phrases
+    if 'boxes' not in state:
+        state['boxes'] = []
+    boxes = state['boxes']
+    print('grounding texts:', grounding_texts)
+    phrases = grounding_texts
+    grounding_texts = [x.strip() for x in grounding_texts.split(';')]
+    # assert len(boxes) == len(grounding_texts)
+    if len(boxes) != len(grounding_texts):
+        if len(boxes) < len(grounding_texts):
+            raise ValueError("""The number of boxes should be equal to the number of grounding objects.
+Number of boxes drawn: {}, number of grounding tokens: {}.
+Please draw boxes accordingly on the sketch pad.""".format(len(boxes), len(grounding_texts)))
+        grounding_texts = grounding_texts + [""] * (len(boxes) - len(grounding_texts))
+    boxes = (np.asarray(boxes) / 512).tolist()
+    boxes = [[box] for box in boxes]
+    grounding_instruction = json.dumps({obj: box for obj, box in zip(grounding_texts, boxes)})
+    language_instruction_list = language_instruction.strip('.').split(' ')
+    object_positions = []
+    for obj in grounding_texts:
+        obj_position = []
+        for word in obj.split(' '):
+            obj_first_index = language_instruction_list.index(word) + 1
+            obj_position.append(obj_first_index)
+        object_positions.append(obj_position)
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    object_positions = Pharse2idx(language_instruction, phrases)
+    gen_images = inference(device, unet, vae, tokenizer, text_encoder, language_instruction, boxes, object_positions, batch_size, loss_scale, loss_threshold, max_iter, max_step, rand_seed, guidance_scale)
+    blank_samples = batch_size % 2 if batch_size > 1 else 0
+    gen_images = [gr.Image.update(value=x, visible=True) for i, x in enumerate(gen_images)] \
+                 + [gr.Image.update(value=None, visible=True) for _ in range(blank_samples)] \
+                 + [gr.Image.update(value=None, visible=False) for _ in range(4 - batch_size - blank_samples)]
+    return gen_images + [state]
+def generate_legacy(unet, vae, tokenizer, text_encoder, language_instruction, grounding_texts, sketch_pad,
+             loss_threshold, guidance_scale, batch_size, rand_seed, max_step, loss_scale, max_iter,
+             state):
     if 'boxes' not in state:
         state['boxes'] = []
     boxes = state['boxes']