Spaces:

Pusheen
/

LoCo

Sleeping

App Files Files Community

Pusheen commited on Feb 25

Commit

281df87

•

1 Parent(s): 99b29f3

Upload 139 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

__init__.py +0 -0
app.py +793 -0
dataset/__init__.py +0 -0
dataset/__pycache__/__init__.cpython-38.pyc +0 -0
dataset/__pycache__/catalog.cpython-38.pyc +0 -0
dataset/__pycache__/concat_dataset.cpython-38.pyc +0 -0
dataset/base_dataset.py +220 -0
dataset/catalog.py +72 -0
dataset/cd_dataset.py +250 -0
dataset/concat_dataset.py +65 -0
dataset/grounding_dataset.py +205 -0
dataset/layout_dataset.py +237 -0
dataset/tsv.py +212 -0
dataset/tsv_dataset.py +326 -0
dataset/utils.py +116 -0
environment.yaml +29 -0
example_component.py +805 -0
gligen/.DS_Store +0 -0
gligen/SD_input_conv_weight_bias.pth +3 -0
gligen/__init__.py +10 -0
gligen/__pycache__/__init__.cpython-38.pyc +0 -0
gligen/__pycache__/distributed.cpython-38.pyc +0 -0
gligen/__pycache__/evaluator.cpython-38.pyc +0 -0
gligen/__pycache__/task_grounded_generation.cpython-38.pyc +0 -0
gligen/__pycache__/trainer.cpython-38.pyc +0 -0
gligen/create_meta.py +170 -0
gligen/distributed.py +122 -0
gligen/evaluator.py +225 -0
gligen/ldm/.DS_Store +0 -0
gligen/ldm/__pycache__/util.cpython-38.pyc +0 -0
gligen/ldm/data/.DS_Store +0 -0
gligen/ldm/data/__init__.py +0 -0
gligen/ldm/data/base.py +23 -0
gligen/ldm/data/imagenet.py +394 -0
gligen/ldm/data/imagenet_clsidx_to_label.txt +1000 -0
gligen/ldm/data/imagenet_train_hr_indices.p +3 -0
gligen/ldm/data/imagenet_val_hr_indices.p +3 -0
gligen/ldm/data/index_synset.yaml +1000 -0
gligen/ldm/data/lsun.py +92 -0
gligen/ldm/lr_scheduler.py +98 -0
gligen/ldm/models/.DS_Store +0 -0
gligen/ldm/models/__pycache__/autoencoder.cpython-38.pyc +0 -0
gligen/ldm/models/autoencoder.py +52 -0
gligen/ldm/models/diffusion/__init__.py +0 -0
gligen/ldm/models/diffusion/__pycache__/__init__.cpython-38.pyc +0 -0
gligen/ldm/models/diffusion/__pycache__/ddim.cpython-38.pyc +0 -0
gligen/ldm/models/diffusion/__pycache__/ddpm.cpython-38.pyc +0 -0
gligen/ldm/models/diffusion/__pycache__/gaussian_smoothing.cpython-38.pyc +0 -0
gligen/ldm/models/diffusion/__pycache__/ldm.cpython-38.pyc +0 -0
gligen/ldm/models/diffusion/__pycache__/loss.cpython-38.pyc +0 -0

__init__.py ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,793 @@

+import gradio as gr
+import torch
+from omegaconf import OmegaConf
+from gligen.task_grounded_generation import grounded_generation_box, load_ckpt, load_common_ckpt
+import json
+import numpy as np
+from PIL import Image, ImageDraw, ImageFont
+from functools import partial
+from collections import Counter
+import math
+import gc
+from gradio import processing_utils
+from typing import Optional
+import warnings
+from datetime import datetime
+from example_component import create_examples
+from huggingface_hub import hf_hub_download
+hf_hub_download = partial(hf_hub_download, library_name="gligen_demo")
+import cv2
+import sys
+sys.tracebacklimit = 0
+def load_from_hf(repo_id, filename='diffusion_pytorch_model.bin', subfolder=None):
+    cache_file = hf_hub_download(repo_id=repo_id, filename=filename, subfolder=subfolder)
+    return torch.load(cache_file, map_location='cpu')
+def load_ckpt_config_from_hf(modality):
+    ckpt = load_from_hf('gligen/demo_ckpts_legacy', filename=f'{modality}.pth', subfolder='model')
+    config = load_from_hf('gligen/demo_ckpts_legacy', filename=f'{modality}.pth', subfolder='config')
+    return ckpt, config
+def ckpt_load_helper(modality, is_inpaint, is_style, common_instances=None):
+    pretrained_ckpt_gligen, config = load_ckpt_config_from_hf(modality)
+    config = OmegaConf.create( config["_content"] ) # config used in training
+    config.alpha_scale = 1.0
+    if common_instances is None:
+        common_ckpt = load_from_hf('gligen/demo_ckpts_legacy', filename=f'common.pth', subfolder='model')
+        common_instances = load_common_ckpt(config, common_ckpt)
+    loaded_model_list = load_ckpt(config, pretrained_ckpt_gligen, common_instances)
+    return loaded_model_list, common_instances
+class Instance:
+    def __init__(self, capacity = 2):
+        self.model_type = 'base'
+        self.loaded_model_list = {}
+        self.counter = Counter()
+        self.global_counter = Counter()
+        self.loaded_model_list['base'], self.common_instances = ckpt_load_helper(
+            'gligen-generation-text-box',
+            is_inpaint=False, is_style=False, common_instances=None
+        )
+        self.capacity = capacity
+    def _log(self, model_type, batch_size, instruction, phrase_list):
+        self.counter[model_type] += 1
+        self.global_counter[model_type] += 1
+        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        print('[{}] Current: {}, All: {}. Samples: {}, prompt: {}, phrases: {}'.format(
+            current_time, dict(self.counter), dict(self.global_counter), batch_size, instruction, phrase_list
+        ))
+    def get_model(self, model_type, batch_size, instruction, phrase_list):
+        if model_type in self.loaded_model_list:
+            self._log(model_type, batch_size, instruction, phrase_list)
+            return self.loaded_model_list[model_type]
+        if self.capacity == len(self.loaded_model_list):
+            least_used_type = self.counter.most_common()[-1][0]
+            del self.loaded_model_list[least_used_type]
+            del self.counter[least_used_type]
+            gc.collect()
+            torch.cuda.empty_cache()
+        self.loaded_model_list[model_type] = self._get_model(model_type)
+        self._log(model_type, batch_size, instruction, phrase_list)
+        return self.loaded_model_list[model_type]
+    def _get_model(self, model_type):
+        if model_type == 'base':
+            return ckpt_load_helper(
+                'gligen-generation-text-box',
+                is_inpaint=False, is_style=False, common_instances=self.common_instances
+            )[0]
+        elif model_type == 'inpaint':
+            return ckpt_load_helper(
+                'gligen-inpainting-text-box',
+                is_inpaint=True, is_style=False, common_instances=self.common_instances
+            )[0]
+        elif model_type == 'style':
+            return ckpt_load_helper(
+                'gligen-generation-text-image-box',
+                is_inpaint=False, is_style=True, common_instances=self.common_instances
+            )[0]
+        assert False
+instance = Instance()
+def load_clip_model():
+    from transformers import CLIPProcessor, CLIPModel
+    version = "openai/clip-vit-large-patch14"
+    model = CLIPModel.from_pretrained(version).cuda()
+    processor = CLIPProcessor.from_pretrained(version)
+    return {
+        'version': version,
+        'model': model,
+        'processor': processor,
+    }
+clip_model = load_clip_model()
+class ImageMask(gr.components.Image):
+    """
+    Sets: source="canvas", tool="sketch"
+    """
+    is_template = True
+    def __init__(self, **kwargs):
+        super().__init__(source="upload", tool="sketch", interactive=True, **kwargs)
+    def preprocess(self, x):
+        if x is None:
+            return x
+        if self.tool == "sketch" and self.source in ["upload", "webcam"] and type(x) != dict:
+            decode_image = processing_utils.decode_base64_to_image(x)
+            width, height = decode_image.size
+            img = np.asarray(decode_image)
+            return {'image':img, 'mask':binarize_2(img)}
+            mask = np.zeros((height, width, 4), dtype=np.uint8)
+            mask[..., -1] = 255
+            mask = self.postprocess(mask)
+            x = {'image': x, 'mask': mask}
+            print('vao preprocess-------------------------')
+        hh = super().preprocess(x)
+        if (hh['image'].min()!=255) and (hh['mask'][:,:,:3].max()==0):
+            hh['mask'] = binarize_2(hh['image'])
+        return hh
+class Blocks(gr.Blocks):
+    def __init__(
+        self,
+        theme: str = "default",
+        analytics_enabled: Optional[bool] = None,
+        mode: str = "blocks",
+        title: str = "Gradio",
+        css: Optional[str] = None,
+        **kwargs,
+    ):
+        self.extra_configs = {
+            'thumbnail': kwargs.pop('thumbnail', ''),
+            'url': kwargs.pop('url', 'https://gradio.app/'),
+            'creator': kwargs.pop('creator', '@teamGradio'),
+        }
+        super(Blocks, self).__init__(theme, analytics_enabled, mode, title, css, **kwargs)
+        warnings.filterwarnings("ignore")
+    def get_config_file(self):
+        config = super(Blocks, self).get_config_file()
+        for k, v in self.extra_configs.items():
+            config[k] = v
+        return config
+'''
+inference model
+'''
+# @torch.no_grad()
+def inference(task, language_instruction, phrase_list, location_list, inpainting_boxes_nodrop, image,
+              alpha_sample, guidance_scale, batch_size,
+              fix_seed, rand_seed, actual_mask, style_image,
+              *args, **kwargs):
+    # import pdb; pdb.set_trace()
+    # grounding_instruction = json.loads(grounding_instruction)
+    # phrase_list, location_list = [], []
+    # for k, v  in grounding_instruction.items():
+    #     phrase_list.append(k)
+    #     location_list.append(v)
+    placeholder_image = Image.open('images/teddy.jpg').convert("RGB")
+    image_list = [placeholder_image] * len(phrase_list) # placeholder input for visual prompt, which is disabled
+    batch_size = int(batch_size)
+    if not 1 <= batch_size <= 4:
+        batch_size = 1
+    if style_image == None:
+        has_text_mask = 1
+        has_image_mask = 0 # then we hack above 'image_list'
+    else:
+        valid_phrase_len = len(phrase_list)
+        phrase_list += ['placeholder']
+        has_text_mask = [1]*valid_phrase_len + [0]
+        image_list = [placeholder_image]*valid_phrase_len + [style_image]
+        has_image_mask = [0]*valid_phrase_len + [1]
+        location_list += [ [0.0, 0.0, 1, 0.01]  ] # style image grounding location
+    instruction = dict(
+        prompt = language_instruction,
+        phrases = phrase_list,
+        images = image_list,
+        locations = location_list,
+        alpha_type = [alpha_sample, 0, 1.0 - alpha_sample],
+        has_text_mask = has_text_mask,
+        has_image_mask = has_image_mask,
+        save_folder_name = language_instruction,
+        guidance_scale = guidance_scale,
+        batch_size = batch_size,
+        fix_seed = bool(fix_seed),
+        rand_seed = int(rand_seed),
+        actual_mask = actual_mask,
+        inpainting_boxes_nodrop = inpainting_boxes_nodrop,
+    )
+    get_model = partial(instance.get_model,
+                        batch_size=batch_size,
+                        instruction=language_instruction,
+                        phrase_list=phrase_list)
+    with torch.autocast(device_type='cuda', dtype=torch.float16):
+        if task == 'User provide boxes' or 'Available boxes':
+            if style_image == None:
+                result = grounded_generation_box(get_model('base'), instruction, *args, **kwargs)
+                torch.cuda.empty_cache()
+                return result
+            else:
+                return grounded_generation_box(get_model('style'), instruction, *args, **kwargs)
+def draw_box(boxes=[], texts=[], img=None):
+    if len(boxes) == 0 and img is None:
+        return None
+    if img is None:
+        img = Image.new('RGB', (512, 512), (255, 255, 255))
+    colors = ["red", "olive", "blue", "green", "orange", "brown", "cyan", "purple"]
+    draw = ImageDraw.Draw(img)
+    font = ImageFont.truetype("DejaVuSansMono.ttf", size=18)
+    for bid, box in enumerate(boxes):
+        draw.rectangle([box[0], box[1], box[2], box[3]], outline=colors[bid % len(colors)], width=4)
+        anno_text = texts[bid]
+        draw.rectangle([box[0], box[3] - int(font.size * 1.2), box[0] + int((len(anno_text) + 0.8) * font.size * 0.6), box[3]], outline=colors[bid % len(colors)], fill=colors[bid % len(colors)], width=4)
+        draw.text([box[0] + int(font.size * 0.2), box[3] - int(font.size*1.2)], anno_text, font=font, fill=(255,255,255))
+    return img
+def get_concat(ims):
+    if len(ims) == 1:
+        n_col = 1
+    else:
+        n_col = 2
+    n_row = math.ceil(len(ims) / 2)
+    dst = Image.new('RGB', (ims[0].width * n_col, ims[0].height * n_row), color="white")
+    for i, im in enumerate(ims):
+        row_id = i // n_col
+        col_id = i % n_col
+        dst.paste(im, (im.width * col_id, im.height * row_id))
+    return dst
+def auto_append_grounding(language_instruction, grounding_texts):
+    for grounding_text in grounding_texts:
+        if grounding_text.lower() not in language_instruction.lower() and grounding_text != 'auto':
+            language_instruction += "; " + grounding_text
+    return language_instruction
+def generate(task, language_instruction, grounding_texts, sketch_pad,
+             alpha_sample, guidance_scale, batch_size,
+             fix_seed, rand_seed, use_actual_mask, append_grounding, style_cond_image,
+             state):
+    if 'boxes' not in state:
+        state['boxes'] = []
+    boxes = state['boxes']
+    grounding_texts = [x.strip() for x in grounding_texts.split(';')]
+    # assert len(boxes) == len(grounding_texts)
+    if len(boxes) != len(grounding_texts):
+        if len(boxes) < len(grounding_texts):
+            raise ValueError("""The number of boxes should be equal to the number of grounding objects.
+Number of boxes drawn: {}, number of grounding tokens: {}.
+Please draw boxes accordingly on the sketch pad.""".format(len(boxes), len(grounding_texts)))
+        grounding_texts = grounding_texts + [""] * (len(boxes) - len(grounding_texts))
+    boxes = (np.asarray(boxes) / 512).tolist()
+    grounding_instruction = json.dumps({obj: box for obj,box in zip(grounding_texts, boxes)})
+    image = None
+    actual_mask = None
+    if append_grounding:
+        language_instruction = auto_append_grounding(language_instruction, grounding_texts)
+    gen_images, gen_overlays = inference(
+        task, language_instruction, grounding_texts,boxes, boxes, image,
+        alpha_sample, guidance_scale, batch_size,
+        fix_seed, rand_seed, actual_mask, style_cond_image, clip_model=clip_model,
+    )
+    blank_samples = batch_size % 2 if batch_size > 1 else 0
+    gen_images = [gr.Image.update(value=x, visible=True) for i,x in enumerate(gen_images)] \
+                    + [gr.Image.update(value=None, visible=True) for _ in range(blank_samples)] \
+                    + [gr.Image.update(value=None, visible=False) for _ in range(4 - batch_size - blank_samples)]
+    return gen_images + [state]
+def binarize(x):
+    return (x != 0).astype('uint8') * 255
+def binarize_2(x):
+    gray_image = cv2.cvtColor(x, cv2.COLOR_BGR2GRAY)
+    return (gray_image!=255).astype('uint8') * 255
+def sized_center_crop(img, cropx, cropy):
+    y, x = img.shape[:2]
+    startx = x // 2 - (cropx // 2)
+    starty = y // 2 - (cropy // 2)
+    return img[starty:starty+cropy, startx:startx+cropx]
+def sized_center_fill(img, fill, cropx, cropy):
+    y, x = img.shape[:2]
+    startx = x // 2 - (cropx // 2)
+    starty = y // 2 - (cropy // 2)
+    img[starty:starty+cropy, startx:startx+cropx] = fill
+    return img
+def sized_center_mask(img, cropx, cropy):
+    y, x = img.shape[:2]
+    startx = x // 2 - (cropx // 2)
+    starty = y // 2 - (cropy // 2)
+    center_region = img[starty:starty+cropy, startx:startx+cropx].copy()
+    img = (img * 0.2).astype('uint8')
+    img[starty:starty+cropy, startx:startx+cropx] = center_region
+    return img
+def center_crop(img, HW=None, tgt_size=(512, 512)):
+    if HW is None:
+        H, W = img.shape[:2]
+        HW = min(H, W)
+    img = sized_center_crop(img, HW, HW)
+    img = Image.fromarray(img)
+    img = img.resize(tgt_size)
+    return np.array(img)
+def draw(task, input, grounding_texts, new_image_trigger, state, generate_parsed, box_image):
+    print('input', generate_parsed)
+    if type(input) == dict:
+        image = input['image']
+        mask = input['mask']
+        if generate_parsed==1:
+            generate_parsed = 0
+            # import pdb; pdb.set_trace()
+            print('do nothing')
+            return [box_image, new_image_trigger, 1., state, generate_parsed]
+    else:
+        mask = input
+    if mask.ndim == 3:
+        mask = mask[..., 0]
+    image_scale = 1.0
+    print('vao draw--------------------')
+    mask = binarize(mask)
+    if mask.shape != (512, 512):
+        # assert False, "should not receive any non- 512x512 masks."
+        if 'original_image' in state and state['original_image'].shape[:2] == mask.shape:
+            mask = center_crop(mask, state['inpaint_hw'])
+            image = center_crop(state['original_image'], state['inpaint_hw'])
+        else:
+            mask = np.zeros((512, 512), dtype=np.uint8)
+    mask = binarize(mask)
+    if type(mask) != np.ndarray:
+        mask = np.array(mask)
+    #
+    if mask.sum() == 0:
+        state = {}
+        print('delete state')
+    if True:
+        image = None
+    else:
+        image = Image.fromarray(image)
+    if 'boxes' not in state:
+        state['boxes'] = []
+    if 'masks' not in state or len(state['masks']) == 0 :
+        state['masks'] = []
+        last_mask = np.zeros_like(mask)
+    else:
+        last_mask = state['masks'][-1]
+    if type(mask) == np.ndarray and mask.size > 1 :
+        diff_mask = mask - last_mask
+    else:
+        diff_mask = np.zeros([])
+    if diff_mask.sum() > 0:
+        x1x2 = np.where(diff_mask.max(0) > 1)[0]
+        y1y2 = np.where(diff_mask.max(1) > 1)[0]
+        y1, y2 = y1y2.min(), y1y2.max()
+        x1, x2 = x1x2.min(), x1x2.max()
+        if (x2 - x1 > 5) and (y2 - y1 > 5):
+            state['masks'].append(mask.copy())
+            state['boxes'].append((x1, y1, x2, y2))
+    grounding_texts = [x.strip() for x in grounding_texts.split(';')]
+    grounding_texts = [x for x in grounding_texts if len(x) > 0]
+    if len(grounding_texts) < len(state['boxes']):
+        grounding_texts += [f'Obj. {bid+1}' for bid in range(len(grounding_texts), len(state['boxes']))]
+    box_image = draw_box(state['boxes'], grounding_texts, image)
+    generate_parsed = 0
+    return [box_image, new_image_trigger, image_scale, state, generate_parsed]
+def change_state(bboxes,layout, state, instruction, trigger_stage, boxes):
+    if trigger_stage ==0 :
+        return [boxes, state, 0]
+    # mask =
+    state['boxes'] = []
+    state['masks'] = []
+    image = None
+    list_boxes = bboxes.split('/')
+    result =[]
+    for b in list_boxes:
+        ints = b[1:-1].split(',')
+        l = []
+        for i in ints:
+            l.append(int(i))
+        result.append(l)
+    print('run change state')
+    for box in result:
+        state['boxes'].append(box)
+    grounding_texts = [x.strip() for x in instruction.split(';')]
+    grounding_texts = [x for x in grounding_texts if len(x) > 0]
+    if len(grounding_texts) < len(result):
+        grounding_texts += [f'Obj. {bid+1}' for bid in range(len(grounding_texts), len(result))]
+    box_image = draw_box(result, grounding_texts)
+    mask = binarize_2(layout['image'])
+    state['masks'].append(mask.copy())
+    # print('done change state', state)
+    print('done change state')
+    # import pdb; pdb.set_trace()
+    return [box_image,state, trigger_stage]
+def example_click(name, grounding_instruction, instruction, bboxes,generate_parsed,  trigger_parsed):
+    list_boxes = bboxes.split('/')
+    result =[]
+    for b in list_boxes:
+        ints = b[1:-1].split(',')
+        l = []
+        for i in ints:
+            l.append(int(i))
+        result.append(l)
+    print('run change state')
+    box_image = draw_box(result, instruction)
+    trigger_parsed += 1
+    print('done the example click')
+    return [box_image, trigger_parsed]
+def clear(task, sketch_pad_trigger, batch_size, state,trigger_stage, switch_task=False):
+    sketch_pad_trigger = sketch_pad_trigger + 1
+    trigger_stage = 0
+    blank_samples = batch_size % 2 if batch_size > 1 else 0
+    out_images = [gr.Image.update(value=None, visible=True) for i in range(batch_size)] \
+                    + [gr.Image.update(value=None, visible=True) for _ in range(blank_samples)] \
+                    + [gr.Image.update(value=None, visible=False) for _ in range(4 - batch_size - blank_samples)]
+    state = {}
+    return [None, sketch_pad_trigger, None, 1.0] + out_images + [state] + [trigger_stage]
+css = """
+#img2img_image, #img2img_image > .fixed-height, #img2img_image > .fixed-height > div, #img2img_image > .fixed-height > div > img
+{
+    height: var(--height) !important;
+    max-height: var(--height) !important;
+    min-height: var(--height) !important;
+}
+#paper-info a {
+    color:#008AD7;
+    text-decoration: none;
+}
+#paper-info a:hover {
+    cursor: pointer;
+    text-decoration: none;
+}
+#my_image > div.fixed-height
+{
+    height: var(--height) !important;
+}
+"""
+rescale_js = """
+function(x) {
+    const root = document.querySelector('gradio-app').shadowRoot || document.querySelector('gradio-app');
+    let image_scale = parseFloat(root.querySelector('#image_scale input').value) || 1.0;
+    const image_width = root.querySelector('#img2img_image').clientWidth;
+    const target_height = parseInt(image_width * image_scale);
+    document.body.style.setProperty('--height', `${target_height}px`);
+    root.querySelectorAll('button.justify-center.rounded')[0].style.display='none';
+    root.querySelectorAll('button.justify-center.rounded')[1].style.display='none';
+    return x;
+}
+"""
+# [<a href="https://arxiv.org/abs/2301.07093" target="_blank">Paper</a>]
+with Blocks(
+    css=css,
+    analytics_enabled=False,
+    title="Attention-refocusing demo",
+) as main:
+    description = """<p style="text-align: center; font-weight: bold;">
+        <span style="font-size: 28px">Grounded Text-to-Image Synthesis with Attention Refocusing</span>
+        <br>
+        <span style="font-size: 18px" id="paper-info">
+            [<a href="https://attention-refocusing.github.io/" target="_blank">Project Page</a>]
+            [<a href="https://github.com/Attention-Refocusing/attention-refocusing" target="_blank">GitHub</a>]
+        </span>
+    </p>
+    <p>
+        To identify the areas of interest based on specific spatial parameters, you need to (1) &#9000;&#65039; input the names of the concepts you're interested  in <em> Grounding Instruction</em>, and (2) &#128433;&#65039; draw their corresponding bounding boxes using <em> Sketch Pad</em> -- the parsed boxes will automatically be showed up once you've drawn them.
+        <br>
+        For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/gligen/demo?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a>
+    </p>
+    """
+    gr.HTML(description)
+    with gr.Row():
+        with gr.Column(scale=4):
+            sketch_pad_trigger = gr.Number(value=0, visible=False)
+            sketch_pad_resize_trigger = gr.Number(value=0, visible=False)
+            trigger_stage = gr.Number(value=0, visible=False)
+            init_white_trigger = gr.Number(value=0, visible=False)
+            image_scale = gr.Number(value=1.0, elem_id="image_scale", visible=False)
+            new_image_trigger = gr.Number(value=0, visible=False)
+            text_box = gr.Textbox(visible=False)
+            generate_parsed = gr.Number(value=0, visible=False)
+            task = gr.Radio(
+                choices=["Available boxes", 'User provide boxes'],
+                type="value",
+                value="User provide boxes",
+                label="Task",
+                visible=False
+            )
+            language_instruction = gr.Textbox(
+                label="Language instruction",
+            )
+            grounding_instruction = gr.Textbox(
+                label="Grounding instruction (Separated by semicolon)",
+            )
+            with gr.Row():
+                sketch_pad = ImageMask(label="Sketch Pad", elem_id="img2img_image")
+                out_imagebox = gr.Image(type="pil",elem_id="my_image" ,label="Parsed Sketch Pad", shape=(512,512))
+            with gr.Row():
+                clear_btn = gr.Button(value='Clear')
+                gen_btn = gr.Button(value='Generate')
+            with gr.Row():
+                parsed_btn = gr.Button(value='generate parsed boxes', visible=False)
+            with gr.Accordion("Advanced Options", open=False):
+                with gr.Column():
+                    alpha_sample = gr.Slider(minimum=0, maximum=1.0, step=0.1, value=0.3, label="Scheduled Sampling (τ)")
+                    guidance_scale = gr.Slider(minimum=0, maximum=50, step=0.5, value=7.5, label="Guidance Scale")
+                    batch_size = gr.Slider(minimum=1, maximum=4,visible=False, step=1, value=1, label="Number of Samples")
+                    append_grounding = gr.Checkbox(value=True, label="Append grounding instructions to the caption")
+                    use_actual_mask = gr.Checkbox(value=False, label="Use actual mask for inpainting", visible=False)
+                    with gr.Row():
+                        fix_seed = gr.Checkbox(value=True, label="Fixed seed")
+                        rand_seed = gr.Slider(minimum=0, maximum=1000, step=1, value=0, label="Seed")
+                with gr.Row():
+                        use_style_cond = gr.Checkbox(value=False,visible=False, label="Enable Style Condition")
+                        style_cond_image = gr.Image(type="pil",visible=False, label="Style Condition", interactive=True)
+        with gr.Column(scale=4):
+            gr.HTML('<span style="font-size: 20px; font-weight: bold">Generated Images</span>')
+            with gr.Row():
+                out_gen_1 = gr.Image(type="pil", visible=True, show_label=False)
+                out_gen_2 = gr.Image(type="pil", visible=False, show_label=False)
+            with gr.Row():
+                out_gen_3 = gr.Image(type="pil", visible=False, show_label=False)
+                out_gen_4 = gr.Image(type="pil", visible=False, show_label=False)
+        state = gr.State({})
+        class Controller:
+            def __init__(self):
+                self.calls = 0
+                self.tracks = 0
+                self.resizes = 0
+                self.scales = 0
+            def init_white(self, init_white_trigger):
+                self.calls += 1
+                return np.ones((512, 512), dtype='uint8') * 255, 1.0, init_white_trigger+1
+            def change_n_samples(self, n_samples):
+                blank_samples = n_samples % 2 if n_samples > 1 else 0
+                return [gr.Image.update(visible=True) for _ in range(n_samples + blank_samples)] \
+                    + [gr.Image.update(visible=False) for _ in range(4 - n_samples - blank_samples)]
+        controller = Controller()
+        main.load(
+            lambda x:x+1,
+            inputs=sketch_pad_trigger,
+            outputs=sketch_pad_trigger,
+            queue=False)
+        sketch_pad.edit(
+            draw,
+            inputs=[task, sketch_pad, grounding_instruction, sketch_pad_resize_trigger, state, generate_parsed, out_imagebox],
+            outputs=[out_imagebox, sketch_pad_resize_trigger, image_scale, state, generate_parsed],
+            queue=False,
+        )
+        trigger_stage.change(
+            change_state,
+            inputs=[text_box,sketch_pad, state, grounding_instruction, trigger_stage,out_imagebox],
+            outputs=[out_imagebox,state,trigger_stage],
+            queue=True
+        )
+        grounding_instruction.change(
+            draw,
+            inputs=[task, sketch_pad, grounding_instruction, sketch_pad_resize_trigger, state, generate_parsed,out_imagebox],
+            outputs=[out_imagebox, sketch_pad_resize_trigger, image_scale, state, generate_parsed],
+            queue=False,
+        )
+        clear_btn.click(
+            clear,
+            inputs=[task, sketch_pad_trigger, batch_size,trigger_stage, state],
+            outputs=[sketch_pad, sketch_pad_trigger, out_imagebox, image_scale, out_gen_1, out_gen_2, out_gen_3, out_gen_4, state, trigger_stage],
+            queue=False)
+        sketch_pad_trigger.change(
+            controller.init_white,
+            inputs=[init_white_trigger],
+            outputs=[sketch_pad, image_scale, init_white_trigger],
+            queue=False)
+        gen_btn.click(
+            generate,
+            inputs=[
+                task, language_instruction, grounding_instruction, sketch_pad,
+                alpha_sample, guidance_scale, batch_size,
+                fix_seed, rand_seed,
+                use_actual_mask,
+                append_grounding, style_cond_image,
+                state,
+            ],
+            outputs=[out_gen_1, out_gen_2, out_gen_3, out_gen_4, state],
+            queue=True
+        )
+        init_white_trigger.change(
+            None,
+            None,
+            init_white_trigger,
+            _js=rescale_js,
+            queue=False)
+        examples = [
+        [
+                    'guide_imgs/0_a_cat_on_the_right_of_a_dog.jpg',
+                     "a cat;a dog",
+                    "a cat on the right of a dog",
+                    '(291, 88, 481, 301)/(25, 64, 260, 391)',
+                    1, 1
+                ],
+                [
+                    'guide_imgs/0_a_bus_on_the_left_of_a_car.jpg',#'guide_imgs/0_a_bus_on_the_left_of_a_car.jpg',
+                     "a bus;a car",
+                    "a bus and a car",
+                    '(8,128,266,384)/(300,196,502,316)', #'(8,128,266,384)', #/(300,196,502,316)
+                    1, 2
+                ],
+                [
+                    'guide_imgs/1_Two_cars_on_the_street..jpg',
+                     "a car;a car",
+                    "Two cars on the street.",
+                    '(34, 98, 247, 264)/(271, 122, 481, 293)',
+                    1, 3
+                ],
+                [
+                    'guide_imgs/80_two_apples_lay_side_by_side_on_a_wooden_table,_their_glossy_red_and_green_skins_glinting_in_the_sunlight..jpg',
+                     "an apple;an apple",
+                    "two apples lay side by side on a wooden table, their glossy red and green skins glinting in the sunlight.",
+                    '(40, 210, 235, 450)/(275, 210, 470, 450)',
+                    1, 4
+                ],
+                [
+                    'guide_imgs/10_A_banana_on_the_left_of_an_apple..jpg',
+                     "a banana;an apple",
+                    "A banana on the left of an apple.",
+                    '(62, 193, 225, 354)/(300, 184, 432, 329)',
+                    1, 5
+                ],
+                [
+                    'guide_imgs/15_A_pizza_on_the_right_of_a_suitcase..jpg',
+                     "a pizza ;a suitcase",
+                    "A pizza on the right of a suitcase.",
+                    '(307, 112, 490, 280)/(41, 120, 244, 270)',
+                    1, 6
+                ],
+                [
+                    'guide_imgs/1_A_wine_glass_on_top_of_a_dog..jpg',
+                     "a wine glass;a dog",
+                    "A wine glass on top of a dog.",
+                    '(206, 78, 306, 214)/(137, 222, 367, 432)',
+                    1, 7
+                ]
+                ,
+                [
+                    'guide_imgs/2_A_bicycle_on_top_of_a_boat..jpg',
+                     "a bicycle;a boat",
+                    "A bicycle on top of a boat.",
+                    '(185, 110, 335, 205)/(111, 228, 401, 373)',
+                    1, 8
+                ]
+                ,
+                [
+                    'guide_imgs/4_A_laptop_on_top_of_a_teddy_bear..jpg',
+                     "a laptop;a teddy bear",
+                    "A laptop on top of a teddy bear.",
+                    '(180, 70, 332, 210)/(150, 240, 362, 420)',
+                    1, 9
+                ]
+                ,
+                [
+                    'guide_imgs/0_A_train_on_top_of_a_surfboard..jpg',
+                     "a train;a surfboard",
+                    "A train on top of a surfboard.",
+                    '(130, 80, 385, 240)/(75, 260, 440, 450)',
+                    1, 10
+                ]
+         ]
+    with gr.Column():
+        create_examples(
+            examples=examples,
+            inputs=[sketch_pad, grounding_instruction,language_instruction , text_box, generate_parsed, trigger_stage],
+            outputs=None,
+            fn=None,
+            cache_examples=False,
+        )
+main.queue(concurrency_count=1, api_open=False)
+main.launch(share=False, show_api=False, show_error=True, debug=False, server_name="0.0.0.0")

dataset/__init__.py ADDED Viewed

File without changes

dataset/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (139 Bytes). View file

dataset/__pycache__/catalog.cpython-38.pyc ADDED Viewed

Binary file (1.11 kB). View file

dataset/__pycache__/concat_dataset.cpython-38.pyc ADDED Viewed

Binary file (1.88 kB). View file

dataset/base_dataset.py ADDED Viewed

	@@ -0,0 +1,220 @@

+import torch
+from PIL import Image, ImageDraw
+import torchvision.transforms as transforms
+import torchvision
+from zipfile import ZipFile
+import os
+import multiprocessing
+import math
+import numpy as np
+import random
+from io import BytesIO
+VALID_IMAGE_TYPES = ['.jpg', '.jpeg', '.tiff', '.bmp', '.png']
+def check_filenames_in_zipdata(filenames, ziproot):
+    samples = []
+    for fst in ZipFile(ziproot).infolist():
+        fname = fst.filename
+        if fname.endswith('/') or fname.startswith('.') or fst.file_size == 0:
+            continue
+        if os.path.splitext(fname)[1].lower() in VALID_IMAGE_TYPES:
+            samples.append((fname))
+    filenames = set(filenames)
+    samples = set(samples)
+    assert filenames.issubset(samples), 'Something wrong with your zip data'
+def draw_box(img, boxes):
+    colors = ["red", "olive", "blue", "green", "orange", "brown", "cyan", "purple"]
+    draw = ImageDraw.Draw(img)
+    for bid, box in enumerate(boxes):
+        draw.rectangle([box[0], box[1], box[2], box[3]], outline =colors[bid % len(colors)], width=4)
+        # draw.rectangle([box[0], box[1], box[2], box[3]], outline ="red", width=2) # x0 y0 x1 y1
+    return img
+def to_valid(x0, y0, x1, y1, image_size, min_box_size):
+    valid = True
+    if x0>image_size or y0>image_size or x1<0 or y1<0:
+        valid = False # no way to make this box vide, it is completely cropped out
+        return valid, (None, None, None, None)
+    x0 = max(x0, 0)
+    y0 = max(y0, 0)
+    x1 = min(x1, image_size)
+    y1 = min(y1, image_size)
+    if (x1-x0)*(y1-y0) / (image_size*image_size) < min_box_size:
+        valid = False
+        return valid, (None, None, None, None)
+    return valid, (x0, y0, x1, y1)
+def recalculate_box_and_verify_if_valid(x, y, w, h, trans_info, image_size, min_box_size):
+    """
+    x,y,w,h:  the original annotation corresponding to the raw image size.
+    trans_info: what resizing and cropping have been applied to the raw image
+    image_size:  what is the final image size
+    """
+    x0 = x * trans_info["performed_scale"] - trans_info['crop_x']
+    y0 = y * trans_info["performed_scale"] - trans_info['crop_y']
+    x1 = (x + w) * trans_info["performed_scale"] - trans_info['crop_x']
+    y1 = (y + h) * trans_info["performed_scale"] - trans_info['crop_y']
+    # at this point, box annotation has been recalculated based on scaling and cropping
+    # but some point may fall off the image_size region (e.g., negative value), thus we
+    # need to clamp them into 0-image_size. But if all points falling outsize of image
+    # region, then we will consider this is an invalid box.
+    valid, (x0, y0, x1, y1) = to_valid(x0, y0, x1, y1, image_size, min_box_size)
+    if valid:
+        # we also perform random flip.
+        # Here boxes are valid, and are based on image_size
+        if trans_info["performed_flip"]:
+            x0, x1 = image_size-x1, image_size-x0
+    return valid, (x0, y0, x1, y1)
+class BaseDataset(torch.utils.data.Dataset):
+    def __init__(self, image_root, random_crop, random_flip, image_size):
+        super().__init__()
+        self.image_root = image_root
+        self.random_crop = random_crop
+        self.random_flip = random_flip
+        self.image_size = image_size
+        self.use_zip = False
+        if image_root[-4::] == 'zip':
+            self.use_zip = True
+            self.zip_dict = {}
+        if self.random_crop:
+            assert False, 'NOT IMPLEMENTED'
+    def fetch_zipfile(self, ziproot):
+        pid = multiprocessing.current_process().pid # get pid of this process.
+        if pid not in self.zip_dict:
+            self.zip_dict[pid] = ZipFile(ziproot)
+        zip_file = self.zip_dict[pid]
+        return zip_file
+    def fetch_image(self, filename):
+        if self.use_zip:
+            zip_file = self.fetch_zipfile(self.image_root)
+            image = Image.open( BytesIO(zip_file.read(filename)) ).convert('RGB')
+            return image
+        else:
+            image = Image.open( os.path.join(self.image_root,filename) ).convert('RGB')
+        return image
+    def vis_getitem_data(self, index=None, out=None, return_tensor=False, name="res.jpg", print_caption=True):
+        if out is None:
+            out = self[index]
+        img = torchvision.transforms.functional.to_pil_image( out["image"]*0.5+0.5 )
+        canvas = torchvision.transforms.functional.to_pil_image( torch.ones_like(out["image"]) )
+        W, H = img.size
+        if print_caption:
+            caption = out["caption"]
+            print(caption)
+            print(" ")
+        boxes = []
+        for box in out["boxes"]:
+            x0,y0,x1,y1 = box
+            boxes.append( [float(x0*W), float(y0*H), float(x1*W), float(y1*H)] )
+        img = draw_box(img, boxes)
+        if return_tensor:
+            return  torchvision.transforms.functional.to_tensor(img)
+        else:
+            img.save(name)
+    def transform_image(self, pil_image):
+        if self.random_crop:
+            assert False
+            arr = random_crop_arr(pil_image, self.image_size)
+        else:
+            arr, info = center_crop_arr(pil_image, self.image_size)
+        info["performed_flip"] = False
+        if self.random_flip and random.random()<0.5:
+            arr = arr[:, ::-1]
+            info["performed_flip"] = True
+        arr = arr.astype(np.float32) / 127.5 - 1
+        arr = np.transpose(arr, [2,0,1])
+        return torch.tensor(arr), info
+def center_crop_arr(pil_image, image_size):
+    # We are not on a new enough PIL to support the `reducing_gap`
+    # argument, which uses BOX downsampling at powers of two first.
+    # Thus, we do it by hand to improve downsample quality.
+    WW, HH = pil_image.size
+    while min(*pil_image.size) >= 2 * image_size:
+        pil_image = pil_image.resize(
+            tuple(x // 2 for x in pil_image.size), resample=Image.BOX
+        )
+    scale = image_size / min(*pil_image.size)
+    pil_image = pil_image.resize(
+        tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
+    )
+    # at this point, the min of pil_image side is desired image_size
+    performed_scale = image_size / min(WW, HH)
+    arr = np.array(pil_image)
+    crop_y = (arr.shape[0] - image_size) // 2
+    crop_x = (arr.shape[1] - image_size) // 2
+    info = {"performed_scale":performed_scale, 'crop_y':crop_y, 'crop_x':crop_x, "WW":WW, 'HH':HH}
+    return arr[crop_y : crop_y + image_size, crop_x : crop_x + image_size], info
+def random_crop_arr(pil_image, image_size, min_crop_frac=0.8, max_crop_frac=1.0):
+    min_smaller_dim_size = math.ceil(image_size / max_crop_frac)
+    max_smaller_dim_size = math.ceil(image_size / min_crop_frac)
+    smaller_dim_size = random.randrange(min_smaller_dim_size, max_smaller_dim_size + 1)
+    # We are not on a new enough PIL to support the `reducing_gap`
+    # argument, which uses BOX downsampling at powers of two first.
+    # Thus, we do it by hand to improve downsample quality.
+    while min(*pil_image.size) >= 2 * smaller_dim_size:
+        pil_image = pil_image.resize(
+            tuple(x // 2 for x in pil_image.size), resample=Image.BOX
+        )
+    scale = smaller_dim_size / min(*pil_image.size)
+    pil_image = pil_image.resize(
+        tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
+    )
+    arr = np.array(pil_image)
+    crop_y = random.randrange(arr.shape[0] - image_size + 1)
+    crop_x = random.randrange(arr.shape[1] - image_size + 1)
+    return arr[crop_y : crop_y + image_size, crop_x : crop_x + image_size]

dataset/catalog.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+class DatasetCatalog:
+    def __init__(self, ROOT, which_embedder):
+        assert which_embedder in ['clip', 'bert']
+        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+        self.VGGrounding = {
+            "target": "dataset.tsv_dataset.TSVDataset",
+            "train_params": dict(
+                tsv_path=os.path.join(ROOT,'GROUNDING/gqa/tsv/train-00.tsv'),
+            )
+        }
+        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+        self.FlickrGrounding = {
+            "target": "dataset.tsv_dataset.TSVDataset",
+            "train_params":dict(
+                tsv_path=os.path.join(ROOT,'GROUNDING/flickr30k/tsv/train-00.tsv'),
+            )
+        }
+        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+        self.SBUGrounding = {
+            "target": "dataset.tsv_dataset.TSVDataset",
+            "train_params":dict(
+                tsv_path=os.path.join(ROOT,'GROUNDING/SBU/tsv/train-00.tsv'),
+            )
+        }
+        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+        self.CC3MGrounding = {
+            "target": "dataset.tsv_dataset.TSVDataset",
+            "train_params":dict(
+                tsv_path=os.path.join(ROOT,'GROUNDING/CC3M/tsv/train-00.tsv'),
+            )
+        }
+        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+        self.CC12MGrounding = {
+            "target": "dataset.tsv_dataset.TSVDataset",
+            "train_params":dict(
+                tsv_path=os.path.join(ROOT,'GROUNDING/CC12M/tsv/train-00.tsv'),
+            )
+        }
+        # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
+        # temp = 'category_embedding_clip.pth' if which_embedder == 'clip' else 'category_embedding_bert.pth'
+        # obj365_category_embedding_path = os.path.join(ROOT, 'OBJECTS365', temp)
+        self.Obj365Detection = {
+        "target": "dataset.tsv_dataset.TSVDataset",
+        "train_params":dict(
+            tsv_path=os.path.join(ROOT,'OBJECTS365/tsv/train-00.tsv'),
+            ),
+        }

dataset/cd_dataset.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import json, os, random, math
+from collections import defaultdict
+from copy import deepcopy
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+import numpy as np
+from PIL import Image
+from .base_dataset import BaseDataset, check_filenames_in_zipdata, recalculate_box_and_verify_if_valid
+from io import BytesIO
+def not_in_at_all(list1, list2):
+	for a in list1:
+		if a in list2:
+			return False
+	return True
+def clean_annotations(annotations):
+	for anno in annotations:
+		anno.pop("segmentation", None)
+		anno.pop("area", None)
+		anno.pop("iscrowd", None)
+		# anno.pop("id", None)
+def make_a_sentence(obj_names, clean=False):
+	if clean:
+		obj_names = [ name[:-6] if ("-other" in name) else name for name in obj_names]
+	caption = ""
+	tokens_positive = []
+	for obj_name in obj_names:
+		start_len = len(caption)
+		caption += obj_name
+		end_len = len(caption)
+		caption += ", "
+		tokens_positive.append(
+			[[start_len, end_len]] # in real caption, positive tokens can be disjoint, thus using list of list
+		)
+	caption = caption[:-2] # remove last ", "
+	return caption #, tokens_positive
+def check_all_have_same_images(instances_data, stuff_data, caption_data):
+	if stuff_data is not None:
+		assert instances_data["images"] == stuff_data["images"]
+	if caption_data is not None:
+		assert instances_data["images"] == caption_data["images"]
+class CDDataset(BaseDataset):
+	"CD: Caption Detection"
+	def __init__(self,
+                image_root,
+				category_embedding_path,
+				instances_json_path = None,
+				stuff_json_path = None,
+				caption_json_path = None,
+				prob_real_caption = 0,
+				fake_caption_type = 'empty',
+                image_size=256,
+                max_images=None,
+                min_box_size=0.01,
+                max_boxes_per_image=8,
+                include_other=False,
+				random_crop = False,
+				random_flip = True,
+                ):
+		super().__init__(random_crop, random_flip, image_size)
+		self.image_root = image_root
+		self.category_embedding_path = category_embedding_path
+		self.instances_json_path = instances_json_path
+		self.stuff_json_path = stuff_json_path
+		self.caption_json_path = caption_json_path
+		self.prob_real_caption = prob_real_caption
+		self.fake_caption_type = fake_caption_type
+		self.max_images = max_images
+		self.min_box_size = min_box_size
+		self.max_boxes_per_image = max_boxes_per_image
+		self.include_other = include_other
+		assert fake_caption_type in ["empty", "made"]
+		if prob_real_caption > 0:
+			assert caption_json_path is not None, "caption json must be given"
+		# Load all jsons
+		with open(instances_json_path, 'r') as f:
+			instances_data = json.load(f) # keys: 'info', 'images', 'licenses', 'categories', 'annotations'
+		clean_annotations(instances_data["annotations"])
+		self.instances_data = instances_data
+		self.stuff_data = None
+		if stuff_json_path is not None:
+			with open(stuff_json_path, 'r') as f:
+				stuff_data = json.load(f) # keys: 'info', 'images', 'licenses', 'categories', 'annotations'
+			clean_annotations(stuff_data["annotations"])
+			self.stuff_data = stuff_data
+		self.captions_data = None
+		if caption_json_path is not None:
+			with open(caption_json_path, 'r') as f:
+				captions_data = json.load(f) # keys: 'info', 'images', 'licenses', 'categories', 'annotations'
+			clean_annotations(captions_data["annotations"])
+			self.captions_data = captions_data
+		# Load preprocessed name embedding
+		self.category_embeddings = torch.load(category_embedding_path)
+		self.embedding_len = list( self.category_embeddings.values() )[0].shape[0]
+		# Misc
+		self.image_ids = [] # main list for selecting images
+		self.image_id_to_filename = {} # file names used to read image
+		check_all_have_same_images(self.instances_data, self.stuff_data, self.captions_data)
+		for image_data in self.instances_data['images']:
+			image_id = image_data['id']
+			filename = image_data['file_name']
+			self.image_ids.append(image_id)
+			self.image_id_to_filename[image_id] = filename
+		# All category names (including things and stuff)
+		self.object_idx_to_name = {}
+		for category_data in self.instances_data['categories']:
+			self.object_idx_to_name[category_data['id']] = category_data['name']
+		if self.stuff_data is not None:
+			for category_data in self.stuff_data['categories']:
+				self.object_idx_to_name[category_data['id']] = category_data['name']
+		# Add object data from instances and stuff
+		self.image_id_to_objects = defaultdict(list)
+		self.select_objects( self.instances_data['annotations'] )
+		if self.stuff_data is not None:
+			self.select_objects( self.stuff_data['annotations'] )
+		# Add caption data
+		if self.captions_data is not None:
+			self.image_id_to_captions = defaultdict(list)
+			self.select_captions( self.captions_data['annotations'] )
+		# Check if all filenames can be found in the zip file
+		# all_filenames = [self.image_id_to_filename[idx] for idx in self.image_ids]
+		# check_filenames_in_zipdata(all_filenames, image_root)
+	def select_objects(self, annotations):
+		for object_anno in annotations:
+			image_id = object_anno['image_id']
+			object_name = self.object_idx_to_name[object_anno['category_id']]
+			other_ok = object_name != 'other' or self.include_other
+			if other_ok:
+				self.image_id_to_objects[image_id].append(object_anno)
+	def select_captions(self, annotations):
+		for caption_data in annotations:
+			image_id = caption_data['image_id']
+			self.image_id_to_captions[image_id].append(caption_data)
+	def total_images(self):
+		return len(self)
+	def __getitem__(self, index):
+		if self.max_boxes_per_image > 99:
+			assert False, "Are you sure setting such large number of boxes?"
+		out = {}
+		image_id = self.image_ids[index]
+		out['id'] = image_id
+		# Image
+		filename = self.image_id_to_filename[image_id]
+		image = self.fetch_image(filename)
+		#WW, HH = image.size
+		image_tensor, trans_info = self.transform_image(image)
+		out["image"] = image_tensor
+		# Select valid boxes after cropping (center or random)
+		this_image_obj_annos = deepcopy(self.image_id_to_objects[image_id])
+		areas = []
+		all_obj_names = []
+		all_boxes = []
+		all_masks = []
+		all_positive_embeddings = []
+		for object_anno in this_image_obj_annos:
+			x, y, w, h = object_anno['bbox']
+			valid, (x0, y0, x1, y1) = recalculate_box_and_verify_if_valid(x, y, w, h, trans_info, self.image_size, self.min_box_size)
+			if valid:
+				areas.append(  (x1-x0)*(y1-y0) )
+				obj_name = self.object_idx_to_name[ object_anno['category_id']  ]
+				all_obj_names.append(obj_name)
+				all_boxes.append( torch.tensor([x0,y0,x1,y1]) / self.image_size ) # scale to 0-1
+				all_masks.append(1)
+				all_positive_embeddings.append( self.category_embeddings[obj_name]  )
+		wanted_idxs = torch.tensor(areas).sort(descending=True)[1]
+		wanted_idxs = wanted_idxs[0:self.max_boxes_per_image]
+		obj_names = [] # used for making a sentence
+		boxes = torch.zeros(self.max_boxes_per_image, 4)
+		masks = torch.zeros(self.max_boxes_per_image)
+		positive_embeddings = torch.zeros(self.max_boxes_per_image, self.embedding_len)
+		for i, idx in enumerate(wanted_idxs):
+			obj_names.append(  all_obj_names[idx]   )
+			boxes[i] = all_boxes[idx]
+			masks[i] = all_masks[idx]
+			positive_embeddings[i] = all_positive_embeddings[idx]
+		# Caption
+		if random.uniform(0, 1) < self.prob_real_caption:
+			caption_data = self.image_id_to_captions[image_id]
+			idx = random.randint(0,  len(caption_data)-1 )
+			caption = caption_data[idx]["caption"]
+		else:
+			if self.fake_caption_type == "empty":
+				caption = ""
+			else:
+				caption = make_a_sentence(obj_names, clean=True)
+		out["caption"] = caption
+		out["boxes"] = boxes
+		out["masks"] = masks
+		out["positive_embeddings"] = positive_embeddings
+		return out
+	def __len__(self):
+		if self.max_images is None:
+			return len(self.image_ids)
+		return min(len(self.image_ids), self.max_images)

dataset/concat_dataset.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from .catalog import DatasetCatalog
+from ldm.util import instantiate_from_config
+import torch
+class ConCatDataset():
+    def __init__(self, dataset_name_list, ROOT, which_embedder, train=True, repeats=None):
+        self.datasets = []
+        cul_previous_dataset_length = 0
+        offset_map = []
+        which_dataset = []
+        if repeats is None:
+            repeats = [1] * len(dataset_name_list)
+        else:
+            assert len(repeats) == len(dataset_name_list)
+        Catalog = DatasetCatalog(ROOT, which_embedder)
+        for dataset_idx, (dataset_name, yaml_params) in enumerate(dataset_name_list.items()):
+            repeat = repeats[dataset_idx]
+            dataset_dict = getattr(Catalog, dataset_name)
+            target = dataset_dict['target']
+            params = dataset_dict['train_params'] if train else dataset_dict['val_params']
+            if yaml_params is not None:
+                params.update(yaml_params)
+            dataset = instantiate_from_config( dict(target=target, params=params) )
+            self.datasets.append(dataset)
+            for _ in range(repeat):
+                offset_map.append(  torch.ones(len(dataset))*cul_previous_dataset_length  )
+                which_dataset.append(  torch.ones(len(dataset))*dataset_idx  )
+                cul_previous_dataset_length += len(dataset)
+        offset_map = torch.cat(offset_map, dim=0).long()
+        self.total_length = cul_previous_dataset_length
+        self.mapping = torch.arange(self.total_length) - offset_map
+        self.which_dataset = torch.cat(which_dataset, dim=0).long()
+    def total_images(self):
+        count = 0
+        for dataset in self.datasets:
+            print(dataset.total_images())
+            count += dataset.total_images()
+        return count
+    def __getitem__(self, idx):
+        dataset = self.datasets[ self.which_dataset[idx] ]
+        return dataset[ self.mapping[idx] ]
+    def __len__(self):
+        return self.total_length

dataset/grounding_dataset.py ADDED Viewed

	@@ -0,0 +1,205 @@

+from tkinter.messagebox import NO
+import torch
+import json
+from collections import defaultdict
+from PIL import Image, ImageDraw
+from copy import deepcopy
+import os
+import torchvision.transforms as transforms
+import torchvision
+from .base_dataset import BaseDataset, check_filenames_in_zipdata, recalculate_box_and_verify_if_valid
+from io import BytesIO
+import random
+def check_unique(images, fields):
+    for field in fields:
+        temp_list = []
+        for img_info in images:
+            temp_list.append(img_info[field])
+        assert len(set(temp_list)) == len(temp_list), field
+def clean_data(data):
+    for data_info in data:
+        data_info.pop("original_img_id", None)
+        data_info.pop("original_id", None)
+        data_info.pop("sentence_id", None)  # sentence id for each image (multiple sentences for one image)
+        data_info.pop("dataset_name", None)
+        data_info.pop("data_source", None)
+        data_info["data_id"] = data_info.pop("id")
+def clean_annotations(annotations):
+    for anno_info in annotations:
+        anno_info.pop("iscrowd", None) # I have checked that all 0 for flickr, vg, coco
+        anno_info.pop("category_id", None)  # I have checked that all 1 for flickr vg. This is not always 1 for coco, but I do not think we need this annotation
+        anno_info.pop("area", None)
+        # anno_info.pop("id", None)
+        anno_info["data_id"] = anno_info.pop("image_id")
+def draw_box(img, boxes):
+    draw = ImageDraw.Draw(img)
+    for box in boxes:
+        draw.rectangle([box[0], box[1], box[2], box[3]], outline ="red", width=2) # x0 y0 x1 y1
+    return img
+def xyhw2xyxy(box):
+    x0, y0, w, h = box
+    return [ x0, y0, x0+w, y0+h ]
+class GroundingDataset(BaseDataset):
+    def __init__(self,
+                image_root,
+                json_path,
+                annotation_embedding_path,
+                prob_real_caption=1,
+                image_size=256,
+                min_box_size=0.01,
+                max_boxes_per_data=8,
+                max_images=None, # set as 30K used to eval
+                random_crop = False,
+                random_flip = True,
+                ):
+        super().__init__(image_root, random_crop, random_flip, image_size)
+        self.image_root = image_root
+        self.json_path = json_path
+        self.annotation_embedding_path = annotation_embedding_path
+        self.prob_real_caption = prob_real_caption
+        self.min_box_size = min_box_size
+        self.max_boxes_per_data = max_boxes_per_data
+        self.max_images = max_images
+        # Load raw data
+        with open(json_path, 'r') as f:
+            json_raw = json.load(f) # keys: 'info', 'images', 'licenses', 'categories', 'annotations'
+        self.data = json_raw["images"] # donot name it images, which is misleading
+        self.annotations = json_raw["annotations"]
+        # Load preprocessed name embedding
+        if 'bert' in annotation_embedding_path:
+            self.embedding_len = 1280
+        elif 'clip' in annotation_embedding_path:
+            self.embedding_len = 768
+        else:
+            assert False
+        # clean data and annotation
+        check_unique( self.data, ['id'] )
+        check_unique( self.annotations, ['id'] )
+        clean_data(self.data)
+        clean_annotations(self.annotations)
+        self.data_id_list = [  datum['data_id'] for datum in self.data   ]
+        self.data = { datum['data_id']:datum  for datum in self.data } # map self.data from a list into a dict
+        # data point to its annotation mapping
+        self.data_id_to_annos = defaultdict(list)
+        for anno in self.annotations:
+            self.data_id_to_annos[ anno["data_id"] ].append(anno)
+        # These are not used that offen, but are useful in some cases
+        self.file_names = [] # all training images
+        self.file_name_to_data_ids = defaultdict(list) # for each image, there are multiple data points (captions)
+        for data_id in self.data_id_list:
+            fine_name = self.data[data_id]["file_name"]
+            self.file_names.append(fine_name)
+            self.file_name_to_data_ids[fine_name].append(data_id)
+        self.file_names = list(set(self.file_names))
+        if self.max_images is not None:
+            "This is only used as COCO2017P evulation, when we set max_images as 30k"
+            assert False, 'I have commented out the following code to save cpu memory'
+            # new_data_id_list = []
+            # new_file_name_to_data_ids = defaultdict(list)
+            # self.file_names = self.file_names[0:self.max_images]
+            # for file_name in self.file_names:
+            #     data_id = self.file_name_to_data_ids[file_name][0]
+            #     new_data_id_list.append(data_id)
+            #     new_file_name_to_data_ids[file_name].append(data_id)
+            # self.data_id_list = new_data_id_list
+            # self.file_name_to_data_ids = new_file_name_to_data_ids
+		# Check if all filenames can be found in the zip file
+        # all_filenames = [self.data[idx]['file_name']  for idx in self.data_id_list ]
+        # check_filenames_in_zipdata(all_filenames, image_root)
+    def total_images(self):
+        return len(self.file_names)
+    def __getitem__(self, index):
+        if self.max_boxes_per_data > 99:
+            assert False, "Are you sure setting such large number of boxes?"
+        out = {}
+        data_id = self.data_id_list[index]
+        out['id'] = data_id
+        # Image and caption
+        file_name = self.data[data_id]['file_name']
+        image = self.fetch_image(file_name)
+        image_tensor, trans_info = self.transform_image(image)
+        out["image"] = image_tensor
+        if random.uniform(0, 1) < self.prob_real_caption:
+            out["caption"] = self.data[data_id]["caption"]
+        else:
+            out["caption"] = ""
+        annos = deepcopy(self.data_id_to_annos[data_id])
+        areas = []
+        all_boxes = []
+        all_masks = []
+        all_positive_embeddings = []
+        for anno in annos:
+            x, y, w, h = anno['bbox']
+            valid, (x0, y0, x1, y1) = recalculate_box_and_verify_if_valid(x, y, w, h, trans_info, self.image_size, self.min_box_size)
+            if valid:
+                areas.append(  (x1-x0)*(y1-y0)  )
+                all_boxes.append( torch.tensor([x0,y0,x1,y1]) / self.image_size ) # scale to 0-1
+                all_masks.append(1)
+                all_positive_embeddings.append( torch.load(os.path.join(self.annotation_embedding_path,str(anno["id"])), map_location='cpu'  )  )
+        wanted_idxs = torch.tensor(areas).sort(descending=True)[1]
+        wanted_idxs = wanted_idxs[0:self.max_boxes_per_data]
+        boxes = torch.zeros(self.max_boxes_per_data, 4)
+        masks = torch.zeros(self.max_boxes_per_data)
+        positive_embeddings = torch.zeros(self.max_boxes_per_data, self.embedding_len)
+        for i, idx in enumerate(wanted_idxs):
+            boxes[i] = all_boxes[idx]
+            masks[i] = all_masks[idx]
+            positive_embeddings[i] = all_positive_embeddings[idx]
+        out["boxes"] = boxes
+        out["masks"] = masks
+        out["positive_embeddings"] = positive_embeddings
+        return out
+    def __len__(self):
+        return len(self.data_id_list)

dataset/layout_dataset.py ADDED Viewed

	@@ -0,0 +1,237 @@

+import json, os, random, math
+from collections import defaultdict
+from copy import deepcopy
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+import numpy as np
+from PIL import Image, ImageOps
+from .base_dataset import BaseDataset, check_filenames_in_zipdata
+from io import BytesIO
+def clean_annotations(annotations):
+	for anno in annotations:
+		anno.pop("segmentation", None)
+		anno.pop("area", None)
+		anno.pop("iscrowd", None)
+		anno.pop("id", None)
+def make_a_sentence(obj_names, clean=False):
+	if clean:
+		obj_names = [ name[:-6] if ("-other" in name) else name for name in obj_names]
+	caption = ""
+	tokens_positive = []
+	for obj_name in obj_names:
+		start_len = len(caption)
+		caption += obj_name
+		end_len = len(caption)
+		caption += ", "
+		tokens_positive.append(
+			[[start_len, end_len]] # in real caption, positive tokens can be disjoint, thus using list of list
+		)
+	caption = caption[:-2] # remove last ", "
+	return caption #, tokens_positive
+class LayoutDataset(BaseDataset):
+	"""
+	Note: this dataset can somehow be achieved in cd_dataset.CDDataset
+	Since if you donot set prob_real_caption=0 in CDDataset, then that
+	dataset will only use detection annotations. However, in that dataset,
+	we do not remove images but remove boxes.
+	However, in layout2img works, people will just resize raw image data into 256*256,
+	thus they pre-calculate box size and apply min_box_size before min/max_boxes_per_image.
+	And then they will remove images if does not follow the rule.
+	These two different methods will lead to different number of training/val images.
+	Thus this dataset here is only for layout2img.
+	"""
+	def __init__(self,
+                image_root,
+				instances_json_path,
+				stuff_json_path,
+				category_embedding_path,
+				fake_caption_type = 'empty',
+                image_size=256,
+                max_samples=None,
+                min_box_size=0.02,
+                min_boxes_per_image=3,
+                max_boxes_per_image=8,
+                include_other=False,
+				random_flip=True
+                ):
+		super().__init__(random_crop=None, random_flip=None, image_size=None) # we only use vis_getitem func in BaseDataset, donot use the others.
+		assert fake_caption_type in ['empty', 'made']
+		self.image_root = image_root
+		self.instances_json_path = instances_json_path
+		self.stuff_json_path = stuff_json_path
+		self.category_embedding_path = category_embedding_path
+		self.fake_caption_type = fake_caption_type
+		self.image_size = image_size
+		self.max_samples = max_samples
+		self.min_box_size = min_box_size
+		self.min_boxes_per_image = min_boxes_per_image
+		self.max_boxes_per_image = max_boxes_per_image
+		self.include_other = include_other
+		self.random_flip = random_flip
+		self.transform = transforms.Compose([transforms.Resize( (image_size, image_size) ),
+											 transforms.ToTensor(),
+											 transforms.Lambda(lambda t: (t * 2) - 1) ])
+		# Load all jsons
+		with open(instances_json_path, 'r') as f:
+			instances_data = json.load(f) # keys: 'info', 'images', 'licenses', 'categories', 'annotations'
+		clean_annotations(instances_data["annotations"])
+		self.instances_data = instances_data
+		with open(stuff_json_path, 'r') as f:
+			stuff_data = json.load(f) # keys: 'info', 'images', 'licenses', 'categories', 'annotations'
+		clean_annotations(stuff_data["annotations"])
+		self.stuff_data = stuff_data
+		# Load preprocessed name embedding
+		self.category_embeddings = torch.load(category_embedding_path)
+		self.embedding_len = list( self.category_embeddings.values() )[0].shape[0]
+		# Misc
+		self.image_ids = [] # main list for selecting images
+		self.image_id_to_filename = {} # file names used to read image
+		self.image_id_to_size = {} # original size of this image
+		assert instances_data['images'] == stuff_data["images"]
+		for image_data in instances_data['images']:
+			image_id = image_data['id']
+			filename = image_data['file_name']
+			width = image_data['width']
+			height = image_data['height']
+			self.image_ids.append(image_id)
+			self.image_id_to_filename[image_id] = filename
+			self.image_id_to_size[image_id] = (width, height)
+		# All category names (including things and stuff)
+		self.things_id_list = []
+		self.stuff_id_list = []
+		self.object_idx_to_name = {}
+		for category_data in instances_data['categories']:
+			self.things_id_list.append( category_data['id'] )
+			self.object_idx_to_name[category_data['id']] = category_data['name']
+		for category_data in stuff_data['categories']:
+			self.stuff_id_list.append( category_data['id'] )
+			self.object_idx_to_name[category_data['id']] = category_data['name']
+		self.all_categories = [   self.object_idx_to_name.get(k, None) for k in range(183+1) ]
+		# Add object data from instances and stuff
+		self.image_id_to_objects = defaultdict(list)
+		self.select_objects( instances_data['annotations'] )
+		self.select_objects( stuff_data['annotations'] )
+		# Prune images that have too few or too many objects
+		new_image_ids = []
+		for image_id in self.image_ids:
+			num_objs = len(self.image_id_to_objects[image_id])
+			if self.min_boxes_per_image <= num_objs <= self.max_boxes_per_image:
+				new_image_ids.append(image_id)
+		self.image_ids = new_image_ids
+		# Check if all filenames can be found in the zip file
+		all_filenames = [self.image_id_to_filename[idx] for idx in self.image_ids]
+		check_filenames_in_zipdata(all_filenames, image_root)
+	def select_objects(self, annotations):
+		for object_anno in annotations:
+			image_id = object_anno['image_id']
+			_, _, w, h = object_anno['bbox']
+			W, H = self.image_id_to_size[image_id]
+			box_area = (w * h) / (W * H)
+			box_ok = box_area > self.min_box_size
+			object_name = self.object_idx_to_name[object_anno['category_id']]
+			other_ok = object_name != 'other' or self.include_other
+			if box_ok and other_ok:
+				self.image_id_to_objects[image_id].append(object_anno)
+	def total_images(self):
+		return len(self)
+	def __getitem__(self, index):
+		if self.max_boxes_per_image > 99:
+			assert False, "Are you sure setting such large number of boxes?"
+		out = {}
+		image_id = self.image_ids[index]
+		out['id'] = image_id
+		flip = self.random_flip and random.random()<0.5
+		# Image
+		filename = self.image_id_to_filename[image_id]
+		zip_file = self.fetch_zipfile(self.image_root)
+		image = Image.open(BytesIO(zip_file.read(filename))).convert('RGB')
+		WW, HH = image.size
+		if flip:
+			image = ImageOps.mirror(image)
+		out["image"] = self.transform(image)
+		this_image_obj_annos = deepcopy(self.image_id_to_objects[image_id])
+		# Make a sentence
+		obj_names = [] # used for make a sentence
+		boxes = torch.zeros(self.max_boxes_per_image, 4)
+		masks = torch.zeros(self.max_boxes_per_image)
+		positive_embeddings = torch.zeros(self.max_boxes_per_image, self.embedding_len)
+		for idx, object_anno in enumerate(this_image_obj_annos):
+			obj_name = self.object_idx_to_name[ object_anno['category_id']  ]
+			obj_names.append(obj_name)
+			x, y, w, h = object_anno['bbox']
+			x0 = x / WW
+			y0 = y / HH
+			x1 = (x + w) / WW
+			y1 = (y + h) / HH
+			if flip:
+				x0, x1 = 1-x1, 1-x0
+			boxes[idx] = torch.tensor([x0,y0,x1,y1])
+			masks[idx] = 1
+			positive_embeddings[idx] = self.category_embeddings[obj_name]
+		if self.fake_caption_type == 'empty':
+			caption = ""
+		else:
+			caption = make_a_sentence(obj_names, clean=True)
+		out["caption"] = caption
+		out["boxes"] = boxes
+		out["masks"] = masks
+		out["positive_embeddings"] = positive_embeddings
+		return out
+	def __len__(self):
+		if self.max_samples is None:
+			return len(self.image_ids)
+		return min(len(self.image_ids), self.max_samples)

dataset/tsv.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import os
+import os.path as op
+import gc
+import json
+from typing import List
+import logging
+try:
+    from .blob_storage import BlobStorage, disk_usage
+except:
+    class BlobStorage:
+        pass
+def generate_lineidx(filein: str, idxout: str) -> None:
+    idxout_tmp = idxout + '.tmp'
+    with open(filein, 'r') as tsvin, open(idxout_tmp, 'w') as tsvout:
+        fsize = os.fstat(tsvin.fileno()).st_size
+        fpos = 0
+        while fpos != fsize:
+            tsvout.write(str(fpos) + "\n")
+            tsvin.readline()
+            fpos = tsvin.tell()
+    os.rename(idxout_tmp, idxout)
+def read_to_character(fp, c):
+    result = []
+    while True:
+        s = fp.read(32)
+        assert s != ''
+        if c in s:
+            result.append(s[: s.index(c)])
+            break
+        else:
+            result.append(s)
+    return ''.join(result)
+class TSVFile(object):
+    def __init__(self,
+                 tsv_file: str,
+                 if_generate_lineidx: bool = False,
+                 lineidx: str = None,
+                 class_selector: List[str] = None,
+                 blob_storage: BlobStorage = None):
+        self.tsv_file = tsv_file
+        self.lineidx = op.splitext(tsv_file)[0] + '.lineidx' \
+            if not lineidx else lineidx
+        self.linelist = op.splitext(tsv_file)[0] + '.linelist'
+        self.chunks = op.splitext(tsv_file)[0] + '.chunks'
+        self._fp = None
+        self._lineidx = None
+        self._sample_indices = None
+        self._class_boundaries = None
+        self._class_selector = class_selector
+        self._blob_storage = blob_storage
+        self._len = None
+        # the process always keeps the process which opens the file.
+        # If the pid is not equal to the currrent pid, we will re-open the file.
+        self.pid = None
+        # generate lineidx if not exist
+        if not op.isfile(self.lineidx) and if_generate_lineidx:
+            generate_lineidx(self.tsv_file, self.lineidx)
+    def __del__(self):
+        self.gcidx()
+        if self._fp:
+            self._fp.close()
+            # physically remove the tsv file if it is retrieved by BlobStorage
+            if self._blob_storage and 'azcopy' in self.tsv_file and os.path.exists(self.tsv_file):
+                try:
+                    original_usage = disk_usage('/')
+                    os.remove(self.tsv_file)
+                    logging.info("Purged %s (disk usage: %.2f%% => %.2f%%)" %
+                                 (self.tsv_file, original_usage, disk_usage('/') * 100))
+                except:
+                    # Known issue: multiple threads attempting to delete the file will raise a FileNotFound error.
+                    # TODO: try Threadling.Lock to better handle the race condition
+                    pass
+    def __str__(self):
+        return "TSVFile(tsv_file='{}')".format(self.tsv_file)
+    def __repr__(self):
+        return str(self)
+    def gcidx(self):
+        logging.debug('Run gc collect')
+        self._lineidx = None
+        self._sample_indices = None
+        #self._class_boundaries = None
+        return gc.collect()
+    def get_class_boundaries(self):
+        return self._class_boundaries
+    def num_rows(self, gcf=False):
+        if (self._len is None):
+            self._ensure_lineidx_loaded()
+            retval = len(self._sample_indices)
+            if (gcf):
+                self.gcidx()
+            self._len = retval
+        return self._len
+    def seek(self, idx: int):
+        self._ensure_tsv_opened()
+        self._ensure_lineidx_loaded()
+        try:
+            pos = self._lineidx[self._sample_indices[idx]]
+        except:
+            logging.info('=> {}-{}'.format(self.tsv_file, idx))
+            raise
+        self._fp.seek(pos)
+        return [s.strip() for s in self._fp.readline().split('\t')]
+    def seek_first_column(self, idx: int):
+        self._ensure_tsv_opened()
+        self._ensure_lineidx_loaded()
+        pos = self._lineidx[idx]
+        self._fp.seek(pos)
+        return read_to_character(self._fp, '\t')
+    def get_key(self, idx: int):
+        return self.seek_first_column(idx)
+    def __getitem__(self, index: int):
+        return self.seek(index)
+    def __len__(self):
+        return self.num_rows()
+    def _ensure_lineidx_loaded(self):
+        if self._lineidx is None:
+            logging.debug('=> loading lineidx: {}'.format(self.lineidx))
+            with open(self.lineidx, 'r') as fp:
+                lines = fp.readlines()
+                lines = [line.strip() for line in lines]
+                self._lineidx = [int(line) for line in lines]
+            # read the line list if exists
+            linelist = None
+            if op.isfile(self.linelist):
+                with open(self.linelist, 'r') as fp:
+                    linelist = sorted(
+                        [
+                            int(line.strip())
+                            for line in fp.readlines()
+                        ]
+                    )
+            if op.isfile(self.chunks):
+                self._sample_indices = []
+                self._class_boundaries = []
+                class_boundaries = json.load(open(self.chunks, 'r'))
+                for class_name, boundary in class_boundaries.items():
+                    start = len(self._sample_indices)
+                    if class_name in self._class_selector:
+                        for idx in range(boundary[0], boundary[1] + 1):
+                            # NOTE: potentially slow when linelist is long, try to speed it up
+                            if linelist and idx not in linelist:
+                                continue
+                            self._sample_indices.append(idx)
+                    end = len(self._sample_indices)
+                    self._class_boundaries.append((start, end))
+            else:
+                if linelist:
+                    self._sample_indices = linelist
+                else:
+                    self._sample_indices = list(range(len(self._lineidx)))
+    def _ensure_tsv_opened(self):
+        if self._fp is None:
+            if self._blob_storage:
+                self._fp = self._blob_storage.open(self.tsv_file)
+            else:
+                self._fp = open(self.tsv_file, 'r')
+            self.pid = os.getpid()
+        if self.pid != os.getpid():
+            logging.debug('=> re-open {} because the process id changed'.format(self.tsv_file))
+            self._fp = open(self.tsv_file, 'r')
+            self.pid = os.getpid()
+class TSVWriter(object):
+    def __init__(self, tsv_file):
+        self.tsv_file = tsv_file
+        self.lineidx_file = op.splitext(tsv_file)[0] + '.lineidx'
+        self.tsv_file_tmp = self.tsv_file + '.tmp'
+        self.lineidx_file_tmp = self.lineidx_file + '.tmp'
+        self.tsv_fp = open(self.tsv_file_tmp, 'w')
+        self.lineidx_fp = open(self.lineidx_file_tmp, 'w')
+        self.idx = 0
+    def write(self, values, sep='\t'):
+        v = '{0}\n'.format(sep.join(map(str, values)))
+        self.tsv_fp.write(v)
+        self.lineidx_fp.write(str(self.idx) + '\n')
+        self.idx = self.idx + len(v)
+    def close(self):
+        self.tsv_fp.close()
+        self.lineidx_fp.close()
+        os.rename(self.tsv_file_tmp, self.tsv_file)
+        os.rename(self.lineidx_file_tmp, self.lineidx_file)

dataset/tsv_dataset.py ADDED Viewed

	@@ -0,0 +1,326 @@

+from tkinter.messagebox import NO
+import torch
+import json
+from collections import defaultdict
+from PIL import Image, ImageDraw
+from copy import deepcopy
+import os
+import torchvision.transforms as transforms
+import torchvision
+from .base_dataset import BaseDataset, check_filenames_in_zipdata, recalculate_box_and_verify_if_valid
+from io import BytesIO
+import random
+from .tsv import TSVFile
+from io import BytesIO
+import base64
+from PIL import Image
+import numpy as np
+def decode_base64_to_pillow(image_b64):
+    return Image.open(BytesIO(base64.b64decode(image_b64))).convert('RGB')
+def decode_tensor_from_string(arr_str, use_tensor=True):
+    arr = np.frombuffer(base64.b64decode(arr_str), dtype='float32')
+    if use_tensor:
+        arr = torch.from_numpy(arr)
+    return arr
+def decode_item(item):
+    item = json.loads(item)
+    item['image'] = decode_base64_to_pillow(item['image'])
+    for anno in item['annos']:
+        anno['image_embedding_before'] = decode_tensor_from_string(anno['image_embedding_before'])
+        anno['text_embedding_before'] = decode_tensor_from_string(anno['text_embedding_before'])
+        anno['image_embedding_after'] = decode_tensor_from_string(anno['image_embedding_after'])
+        anno['text_embedding_after'] = decode_tensor_from_string(anno['text_embedding_after'])
+    return item
+def check_unique(images, fields):
+    for field in fields:
+        temp_list = []
+        for img_info in images:
+            temp_list.append(img_info[field])
+        assert len(set(temp_list)) == len(temp_list), field
+def clean_data(data):
+    for data_info in data:
+        data_info.pop("original_img_id", None)
+        data_info.pop("original_id", None)
+        data_info.pop("sentence_id", None)  # sentence id for each image (multiple sentences for one image)
+        data_info.pop("dataset_name", None)
+        data_info.pop("data_source", None)
+        data_info["data_id"] = data_info.pop("id")
+def clean_annotations(annotations):
+    for anno_info in annotations:
+        anno_info.pop("iscrowd", None) # I have checked that all 0 for flickr, vg, coco
+        anno_info.pop("category_id", None)  # I have checked that all 1 for flickr vg. This is not always 1 for coco, but I do not think we need this annotation
+        anno_info.pop("area", None)
+        # anno_info.pop("id", None)
+        anno_info["data_id"] = anno_info.pop("image_id")
+def draw_box(img, boxes):
+    draw = ImageDraw.Draw(img)
+    for box in boxes:
+        draw.rectangle([box[0], box[1], box[2], box[3]], outline ="red", width=2) # x0 y0 x1 y1
+    return img
+def xyhw2xyxy(box):
+    x0, y0, w, h = box
+    return [ x0, y0, x0+w, y0+h ]
+def make_a_sentence(obj_names, clean=False):
+    if clean:
+        obj_names = [ name[:-6] if ("-other" in name) else name for name in obj_names]
+    caption = ""
+    tokens_positive = []
+    for obj_name in obj_names:
+        start_len = len(caption)
+        caption += obj_name
+        end_len = len(caption)
+        caption += ", "
+        tokens_positive.append(
+            [[start_len, end_len]] # in real caption, positive tokens can be disjoint, thus using list of list
+        )
+    caption = caption[:-2] # remove last ", "
+    return caption #, tokens_positive
+def mask_for_random_drop_text_or_image_feature(masks, random_drop_embedding):
+    """
+    input masks tell how many valid grounding tokens for this image
+    e.g., 1,1,1,1,0,0,0,0,0,0...
+    If random_drop_embedding=both.  we will random drop either image or
+    text feature for each token,
+    but we always make sure there is at least one feature used.
+    In other words, the following masks are not valid
+    (because for the second obj, no feature at all):
+    image: 1,0,1,1,0,0,0,0,0
+    text:  1,0,0,0,0,0,0,0,0
+    if random_drop_embedding=image. we will random drop image feature
+    and always keep the text one.
+    """
+    N = masks.shape[0]
+    if random_drop_embedding=='both':
+        temp_mask = torch.ones(2,N)
+        for i in range(N):
+            if random.uniform(0, 1) < 0.5: # else keep both features
+                idx = random.sample([0,1], 1)[0] # randomly choose to drop image or text feature
+                temp_mask[idx,i] = 0
+        image_masks = temp_mask[0]*masks
+        text_masks = temp_mask[1]*masks
+    if random_drop_embedding=='image':
+        image_masks = masks*(torch.rand(N)>0.5)*1
+        text_masks = masks
+    return image_masks, text_masks
+def project(x, projection_matrix):
+    """
+    x (Batch*768) should be the penultimate feature of CLIP (before projection)
+    projection_matrix (768*768) is the CLIP projection matrix, which should be weight.data of Linear layer
+    defined in CLIP (out_dim, in_dim), thus we need to apply transpose below.
+    this function will return the CLIP feature (without normalziation)
+    """
+    return [email protected](projection_matrix, 0, 1)
+def inv_project(y, projection_matrix):
+    """
+    y (Batch*768) should be the CLIP feature (after projection)
+    projection_matrix (768*768) is the CLIP projection matrix, which should be weight.data of Linear layer
+    defined in CLIP (out_dim, in_dim).
+    this function will return the CLIP penultimate feature.
+    Note: to make sure getting the correct penultimate feature, the input y should not be normalized.
+    If it is normalized, then the result will be scaled by CLIP feature norm, which is unknown.
+    """
+    return [email protected](torch.linalg.inv(projection_matrix), 0, 1)
+class TSVDataset(BaseDataset):
+    def __init__(self,
+                tsv_path,
+                which_embedder='clip',
+                which_layer=['after','after'], # text and image
+                prob_use_caption=1,
+                random_drop_embedding='none',
+                image_size=256,
+                min_box_size=0.01,
+                max_boxes_per_data=8,
+                max_images=None, # set as 30K used to eval
+                random_crop = False,
+                random_flip = True,
+                ):
+        image_root = "a placeholder path as we are using tsv here"
+        super().__init__(image_root, random_crop, random_flip, image_size)
+        self.tsv_path = tsv_path
+        self.which_embedder = which_embedder
+        self.prob_use_caption = prob_use_caption
+        self.random_drop_embedding = random_drop_embedding
+        self.min_box_size = min_box_size
+        self.max_boxes_per_data = max_boxes_per_data
+        self.max_images = max_images
+        assert which_layer in [ ['after','after'],  ['before','after_renorm'], ['before','after_reproject'] ]
+        assert random_drop_embedding in ['none', 'both', 'image']
+        self.which_layer_text  = which_layer[0]
+        self.which_layer_image = which_layer[1]
+        #self.projection_matrix = torch.load(os.path.join(os.path.dirname(__file__), 'projection_matrix')  )
+        self.projection_matrix = torch.load('projection_matrix.pth')
+        # Load tsv data
+        self.tsv_file = TSVFile(self.tsv_path)
+        # Load preprocessed name embedding
+        if which_embedder == 'bert':
+            self.embedding_len = 1280
+        elif which_embedder == 'clip':
+            self.embedding_len = 768
+        else:
+            assert False
+    def total_images(self):
+        return len(self)
+    def get_item_from_tsv(self, index):
+        _, item = self.tsv_file[index]
+        item = decode_item(item)
+        return item
+    def mapping(self, image_embedding):
+        if self.which_layer_image == 'after':
+            # both use CLIP aligned feature
+            return image_embedding
+        elif self.which_layer_image == 'after_renorm':
+            # text use before, but image use after projection but normalize to 28.7
+            return image_embedding*28.7
+        elif self.which_layer_image == 'after_reproject':
+            image_embedding = project( image_embedding.unsqueeze(0), self.projection_matrix.T )
+            image_embedding = image_embedding.squeeze(0)
+            image_embedding = image_embedding / image_embedding.norm()
+            image_embedding = image_embedding * 28.7
+            return image_embedding
+    def __getitem__(self, index):
+        if self.max_boxes_per_data > 99:
+            assert False, "Are you sure setting such large number of boxes?"
+        raw_item = self.get_item_from_tsv(index)
+        is_det = raw_item.get('is_det', False) # if it is from detection (such as o365), then we will make a caption
+        out = {}
+        # -------------------- id and image ------------------- #
+        out['id'] = raw_item['data_id']
+        image = raw_item['image']
+        image_tensor, trans_info = self.transform_image(image)
+        out["image"] = image_tensor
+        # -------------------- grounding token ------------------- #
+        annos = raw_item['annos']
+        areas = []
+        all_boxes = []
+        all_masks = []
+        all_text_embeddings = []
+        all_image_embeddings = []
+        if is_det:
+            all_category_names = []
+        text_embedding_name = 'text_embedding_before' if self.which_layer_text == 'before' else 'text_embedding_after'
+        image_embedding_name = 'image_embedding_after'
+        for anno in annos:
+            x, y, w, h = anno['bbox']
+            valid, (x0, y0, x1, y1) = recalculate_box_and_verify_if_valid(x, y, w, h, trans_info, self.image_size, self.min_box_size)
+            if valid:
+                areas.append(  (x1-x0)*(y1-y0)  )
+                all_boxes.append( torch.tensor([x0,y0,x1,y1]) / self.image_size ) # scale to 0-1
+                all_masks.append(1)
+                all_text_embeddings.append(anno[text_embedding_name])
+                all_image_embeddings.append(  self.mapping(anno[image_embedding_name])  )
+                if is_det:
+                    all_category_names.append(anno["category_name"])
+        wanted_idxs = torch.tensor(areas).sort(descending=True)[1]
+        wanted_idxs = wanted_idxs[0:self.max_boxes_per_data]
+        boxes = torch.zeros(self.max_boxes_per_data, 4)
+        masks = torch.zeros(self.max_boxes_per_data)
+        text_embeddings =  torch.zeros(self.max_boxes_per_data, self.embedding_len)
+        image_embeddings = torch.zeros(self.max_boxes_per_data, self.embedding_len)
+        if is_det:
+            category_names = []
+        for i, idx in enumerate(wanted_idxs):
+            boxes[i] = all_boxes[idx]
+            masks[i] = all_masks[idx]
+            text_embeddings[i] =  all_text_embeddings[idx]
+            image_embeddings[i] = all_image_embeddings[idx]
+            if is_det:
+                category_names.append(all_category_names[idx])
+        if self.random_drop_embedding != 'none':
+            image_masks, text_masks = mask_for_random_drop_text_or_image_feature(masks, self.random_drop_embedding)
+        else:
+            image_masks = masks
+            text_masks = masks
+        out["boxes"] = boxes
+        out["masks"] = masks
+        out["image_masks"] = image_masks
+        out["text_masks"] = text_masks
+        out["text_embeddings"] =  text_embeddings
+        out["image_embeddings"] = image_embeddings
+        # -------------------- caption ------------------- #
+        if random.uniform(0, 1) < self.prob_use_caption:
+            if is_det:
+                out["caption"] = make_a_sentence(category_names)
+            else:
+                out["caption"] = raw_item["caption"]
+        else:
+            out["caption"] = ""
+        return out
+    def __len__(self):
+        return len(self.tsv_file)

dataset/utils.py ADDED Viewed

	@@ -0,0 +1,116 @@

+#!/usr/bin/python
+#
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import PIL
+import torch
+import torchvision.transforms as T
+IMAGENET_MEAN = [0.485, 0.456, 0.406]
+IMAGENET_STD = [0.229, 0.224, 0.225]
+INV_IMAGENET_MEAN = [-m for m in IMAGENET_MEAN]
+INV_IMAGENET_STD = [1.0 / s for s in IMAGENET_STD]
+def imagenet_preprocess():
+  return T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
+def rescale(x):
+  lo, hi = x.min(), x.max()
+  return x.sub(lo).div(hi - lo)
+def imagenet_deprocess(rescale_image=True):
+  transforms = [
+    T.Normalize(mean=[0, 0, 0], std=INV_IMAGENET_STD),
+    T.Normalize(mean=INV_IMAGENET_MEAN, std=[1.0, 1.0, 1.0]),
+  ]
+  if rescale_image:
+    transforms.append(rescale)
+  return T.Compose(transforms)
+def imagenet_deprocess_batch(imgs, rescale=True):
+  """
+  Input:
+  - imgs: FloatTensor of shape (N, C, H, W) giving preprocessed images
+  Output:
+  - imgs_de: ByteTensor of shape (N, C, H, W) giving deprocessed images
+    in the range [0, 255]
+  """
+  if isinstance(imgs, torch.autograd.Variable):
+    imgs = imgs.data
+  imgs = imgs.cpu().clone()
+  deprocess_fn = imagenet_deprocess(rescale_image=rescale)
+  imgs_de = []
+  for i in range(imgs.size(0)):
+    img_de = deprocess_fn(imgs[i])[None]
+    img_de = img_de.mul(255).clamp(0, 255).byte()
+    imgs_de.append(img_de)
+  imgs_de = torch.cat(imgs_de, dim=0)
+  return imgs_de
+class Resize(object):
+  def __init__(self, size, interp=PIL.Image.BILINEAR):
+    if isinstance(size, tuple):
+      H, W = size
+      self.size = (W, H)
+    else:
+      self.size = (size, size)
+    self.interp = interp
+  def __call__(self, img):
+    return img.resize(self.size, self.interp)
+def unpack_var(v):
+  if isinstance(v, torch.autograd.Variable):
+    return v.data
+  return v
+def split_graph_batch(triples, obj_data, obj_to_img, triple_to_img):
+  triples = unpack_var(triples)
+  obj_data = [unpack_var(o) for o in obj_data]
+  obj_to_img = unpack_var(obj_to_img)
+  triple_to_img = unpack_var(triple_to_img)
+  triples_out = []
+  obj_data_out = [[] for _ in obj_data]
+  obj_offset = 0
+  N = obj_to_img.max() + 1
+  for i in range(N):
+    o_idxs = (obj_to_img == i).nonzero().view(-1)
+    t_idxs = (triple_to_img == i).nonzero().view(-1)
+    cur_triples = triples[t_idxs].clone()
+    cur_triples[:, 0] -= obj_offset
+    cur_triples[:, 2] -= obj_offset
+    triples_out.append(cur_triples)
+    for j, o_data in enumerate(obj_data):
+      cur_o_data = None
+      if o_data is not None:
+        cur_o_data = o_data[o_idxs]
+      obj_data_out[j].append(cur_o_data)
+    obj_offset += o_idxs.size(0)
+  return triples_out, obj_data_out

environment.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+name: gligen_demo
+channels:
+  - xformers/label/dev
+  - pytorch
+  - defaults
+dependencies:
+  - python=3.10.8
+  - pip=22.2.2
+  - cudatoolkit=11.3
+  - pytorch=1.12.1
+  - torchvision=0.13.1
+  - numpy=1.23.1
+  - xformers
+  - pip:
+    - omegaconf==2.1.1
+    - albumentations==1.3.0
+    - opencv-python
+    - imageio==2.9.0
+    - imageio-ffmpeg==0.4.2
+    - pytorch-lightning==1.4.2
+    - test-tube>=0.7.5
+    - streamlit==1.12.1
+    - einops==0.3.0
+    - git+https://github.com/openai/CLIP.git
+    - protobuf~=3.20.1
+    - torchmetrics==0.6.0
+    - transformers==4.19.2
+    - kornia==0.6.0
+    - gradio==3.16.0

example_component.py ADDED Viewed

	@@ -0,0 +1,805 @@

+"""
+Defines helper methods useful for loading and caching Interface examples.
+"""
+from __future__ import annotations
+import ast
+import csv
+import inspect
+import os
+import subprocess
+import tempfile
+import threading
+import warnings
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, List, Tuple
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import PIL
+import PIL.Image
+from gradio import components, processing_utils, routes, utils
+from gradio.context import Context
+from gradio.documentation import document, set_documentation_group
+from gradio.flagging import CSVLogger
+if TYPE_CHECKING:  # Only import for type checking (to avoid circular imports).
+    from gradio.components import IOComponent
+CACHED_FOLDER = "gradio_cached_examples"
+LOG_FILE = "log.csv"
+set_documentation_group("helpers")
+def create_examples(
+    examples: List[Any] | List[List[Any]] | str,
+    inputs: IOComponent | List[IOComponent],
+    outputs: IOComponent | List[IOComponent] | None = None,
+    fn: Callable | None = None,
+    cache_examples: bool = False,
+    examples_per_page: int = 10,
+    _api_mode: bool = False,
+    label: str | None = None,
+    elem_id: str | None = None,
+    run_on_click: bool = False,
+    preprocess: bool = True,
+    postprocess: bool = True,
+    batch: bool = False,
+):
+    """Top-level synchronous function that creates Examples. Provided for backwards compatibility, i.e. so that gr.Examples(...) can be used to create the Examples component."""
+    examples_obj = Examples(
+        examples=examples,
+        inputs=inputs,
+        outputs=outputs,
+        fn=fn,
+        cache_examples=cache_examples,
+        examples_per_page=examples_per_page,
+        _api_mode=_api_mode,
+        label=label,
+        elem_id=elem_id,
+        run_on_click=run_on_click,
+        preprocess=preprocess,
+        postprocess=postprocess,
+        batch=batch,
+        _initiated_directly=False,
+    )
+    utils.synchronize_async(examples_obj.create)
+    return examples_obj
+class Examples:
+    """
+    This class is a wrapper over the Dataset component and can be used to create Examples
+    for Blocks / Interfaces. Populates the Dataset component with examples and
+    assigns event listener so that clicking on an example populates the input/output
+    components. Optionally handles example caching for fast inference.
+    Demos: blocks_inputs, fake_gan
+    Guides: more_on_examples_and_flagging, using_hugging_face_integrations, image_classification_in_pytorch, image_classification_in_tensorflow, image_classification_with_vision_transformers, create_your_own_friends_with_a_gan
+    """
+    def __init__(
+        self,
+        examples: List[Any] | List[List[Any]] | str,
+        inputs: IOComponent | List[IOComponent],
+        outputs: IOComponent | List[IOComponent] | None = None,
+        fn: Callable | None = None,
+        cache_examples: bool = False,
+        examples_per_page: int = 10,
+        _api_mode: bool = False,
+        label: str | None = "Examples",
+        elem_id: str | None = None,
+        run_on_click: bool = False,
+        preprocess: bool = True,
+        postprocess: bool = True,
+        batch: bool = False,
+        _initiated_directly: bool = True,
+    ):
+        """
+        Parameters:
+            examples: example inputs that can be clicked to populate specific components. Should be nested list, in which the outer list consists of samples and each inner list consists of an input corresponding to each input component. A string path to a directory of examples can also be provided but it should be within the directory with the python file running the gradio app. If there are multiple input components and a directory is provided, a log.csv file must be present in the directory to link corresponding inputs.
+            inputs: the component or list of components corresponding to the examples
+            outputs: optionally, provide the component or list of components corresponding to the output of the examples. Required if `cache` is True.
+            fn: optionally, provide the function to run to generate the outputs corresponding to the examples. Required if `cache` is True.
+            cache_examples: if True, caches examples for fast runtime. If True, then `fn` and `outputs` need to be provided
+            examples_per_page: how many examples to show per page.
+            label: the label to use for the examples component (by default, "Examples")
+            elem_id: an optional string that is assigned as the id of this component in the HTML DOM.
+            run_on_click: if cache_examples is False, clicking on an example does not run the function when an example is clicked. Set this to True to run the function when an example is clicked. Has no effect if cache_examples is True.
+            preprocess: if True, preprocesses the example input before running the prediction function and caching the output. Only applies if cache_examples is True.
+            postprocess: if True, postprocesses the example output after running the prediction function and before caching. Only applies if cache_examples is True.
+            batch: If True, then the function should process a batch of inputs, meaning that it should accept a list of input values for each parameter. Used only if cache_examples is True.
+        """
+        if _initiated_directly:
+            warnings.warn(
+                "Please use gr.Examples(...) instead of gr.examples.Examples(...) to create the Examples.",
+            )
+        if cache_examples and (fn is None or outputs is None):
+            raise ValueError("If caching examples, `fn` and `outputs` must be provided")
+        if not isinstance(inputs, list):
+            inputs = [inputs]
+        if outputs and not isinstance(outputs, list):
+            outputs = [outputs]
+        working_directory = Path().absolute()
+        if examples is None:
+            raise ValueError("The parameter `examples` cannot be None")
+        elif isinstance(examples, list) and (
+            len(examples) == 0 or isinstance(examples[0], list)
+        ):
+            pass
+        elif (
+            isinstance(examples, list) and len(inputs) == 1
+        ):  # If there is only one input component, examples can be provided as a regular list instead of a list of lists
+            examples = [[e] for e in examples]
+        elif isinstance(examples, str):
+            if not Path(examples).exists():
+                raise FileNotFoundError(
+                    "Could not find examples directory: " + examples
+                )
+            working_directory = examples
+            if not (Path(examples) / LOG_FILE).exists():
+                if len(inputs) == 1:
+                    examples = [[e] for e in os.listdir(examples)]
+                else:
+                    raise FileNotFoundError(
+                        "Could not find log file (required for multiple inputs): "
+                        + LOG_FILE
+                    )
+            else:
+                with open(Path(examples) / LOG_FILE) as logs:
+                    examples = list(csv.reader(logs))
+                    examples = [
+                        examples[i][: len(inputs)] for i in range(1, len(examples))
+                    ]  # remove header and unnecessary columns
+        else:
+            raise ValueError(
+                "The parameter `examples` must either be a string directory or a list"
+                "(if there is only 1 input component) or (more generally), a nested "
+                "list, where each sublist represents a set of inputs."
+            )
+        input_has_examples = [False] * len(inputs)
+        for example in examples:
+            for idx, example_for_input in enumerate(example):
+                if not (example_for_input is None):
+                    try:
+                        input_has_examples[idx] = True
+                    except IndexError:
+                        pass  # If there are more example components than inputs, ignore. This can sometimes be intentional (e.g. loading from a log file where outputs and timestamps are also logged)
+        inputs_with_examples = [
+            inp for (inp, keep) in zip(inputs, input_has_examples) if keep
+        ]
+        non_none_examples = [
+            [ex for (ex, keep) in zip(example, input_has_examples) if keep]
+            for example in examples
+        ]
+        self.examples = examples
+        self.non_none_examples = non_none_examples
+        self.inputs = inputs
+        self.inputs_with_examples = inputs_with_examples
+        self.outputs = outputs
+        self.fn = fn
+        self.cache_examples = cache_examples
+        self._api_mode = _api_mode
+        self.preprocess = preprocess
+        self.postprocess = postprocess
+        self.batch = batch
+        with utils.set_directory(working_directory):
+            self.processed_examples = [
+                [
+                    component.postprocess(sample)
+                    for component, sample in zip(inputs, example)
+                ]
+                for example in examples
+            ]
+        self.non_none_processed_examples = [
+            [ex for (ex, keep) in zip(example, input_has_examples) if keep]
+            for example in self.processed_examples
+        ]
+        if cache_examples:
+            for example in self.examples:
+                if len([ex for ex in example if ex is not None]) != len(self.inputs):
+                    warnings.warn(
+                        "Examples are being cached but not all input components have "
+                        "example values. This may result in an exception being thrown by "
+                        "your function. If you do get an error while caching examples, make "
+                        "sure all of your inputs have example values for all of your examples "
+                        "or you provide default values for those particular parameters in your function."
+                    )
+                    break
+        with utils.set_directory(working_directory):
+            self.dataset = components.Dataset(
+                components=inputs_with_examples,
+                samples=non_none_examples,
+                type="index",
+                label=label,
+                samples_per_page=examples_per_page,
+                elem_id=elem_id,
+            )
+        self.cached_folder = Path(CACHED_FOLDER) / str(self.dataset._id)
+        self.cached_file = Path(self.cached_folder) / "log.csv"
+        self.cache_examples = cache_examples
+        self.run_on_click = run_on_click
+    async def create(self) -> None:
+        """Caches the examples if self.cache_examples is True and creates the Dataset
+        component to hold the examples"""
+        async def load_example(example_id):
+            # import pdb; pdb.set_trace()
+            if self.cache_examples:
+                processed_example = self.non_none_processed_examples[
+                    example_id
+                ] + await self.load_from_cache(example_id)
+            else:
+                processed_example = self.non_none_processed_examples[example_id]
+            return utils.resolve_singleton(processed_example)
+        if Context.root_block:
+            if self.cache_examples and self.outputs:
+                targets = self.inputs_with_examples + self.outputs
+            else:
+                targets = self.inputs_with_examples
+            self.dataset.click(
+                load_example,
+                inputs=[self.dataset],
+                outputs=targets,  # type: ignore
+                postprocess=False,
+                queue=False,
+            )
+            self.dataset.click(
+                self.fn,
+                inputs=[self.dataset],
+                outputs=targets,  # type: ignore
+                postprocess=False,
+                queue=False,
+            )
+            # if self.run_on_click and not self.cache_examples:
+            #     if self.fn is None:
+            #         raise ValueError("Cannot run_on_click if no function is provided")
+            #     self.dataset.click(
+            #         self.fn,
+            #         inputs=self.inputs,  # type: ignore
+            #         outputs=self.outputs,  # type: ignore
+            #     )
+        if self.cache_examples:
+            await self.cache()
+    async def cache(self) -> None:
+        """
+        Caches all of the examples so that their predictions can be shown immediately.
+        """
+        if Path(self.cached_file).exists():
+            print(
+                f"Using cache from '{utils.abspath(self.cached_folder)}' directory. If method or examples have changed since last caching, delete this folder to clear cache."
+            )
+        else:
+            if Context.root_block is None:
+                raise ValueError("Cannot cache examples if not in a Blocks context")
+            print(f"Caching examples at: '{utils.abspath(self.cached_folder)}'")
+            cache_logger = CSVLogger()
+            # create a fake dependency to process the examples and get the predictions
+            dependency = Context.root_block.set_event_trigger(
+                event_name="fake_event",
+                fn=self.fn,
+                inputs=self.inputs_with_examples,  # type: ignore
+                outputs=self.outputs,  # type: ignore
+                preprocess=self.preprocess and not self._api_mode,
+                postprocess=self.postprocess and not self._api_mode,
+                batch=self.batch,
+            )
+            fn_index = Context.root_block.dependencies.index(dependency)
+            assert self.outputs is not None
+            cache_logger.setup(self.outputs, self.cached_folder)
+            for example_id, _ in enumerate(self.examples):
+                processed_input = self.processed_examples[example_id]
+                if self.batch:
+                    processed_input = [[value] for value in processed_input]
+                prediction = await Context.root_block.process_api(
+                    fn_index=fn_index, inputs=processed_input, request=None, state={}
+                )
+                output = prediction["data"]
+                if self.batch:
+                    output = [value[0] for value in output]
+                cache_logger.flag(output)
+            # Remove the "fake_event" to prevent bugs in loading interfaces from spaces
+            Context.root_block.dependencies.remove(dependency)
+            Context.root_block.fns.pop(fn_index)
+    async def load_from_cache(self, example_id: int) -> List[Any]:
+        """Loads a particular cached example for the interface.
+        Parameters:
+            example_id: The id of the example to process (zero-indexed).
+        """
+        # import pdb; pdb.set_trace()
+        with open(self.cached_file, encoding="utf-8") as cache:
+            examples = list(csv.reader(cache))
+        example = examples[example_id + 1]  # +1 to adjust for header
+        output = []
+        assert self.outputs is not None
+        for component, value in zip(self.outputs, example):
+            try:
+                value_as_dict = ast.literal_eval(value)
+                assert utils.is_update(value_as_dict)
+                output.append(value_as_dict)
+            except (ValueError, TypeError, SyntaxError, AssertionError):
+                output.append(component.serialize(value, self.cached_folder))
+        return output
+class TrackedIterable:
+    def __init__(
+        self,
+        iterable: Iterable | None,
+        index: int | None,
+        length: int | None,
+        desc: str | None,
+        unit: str | None,
+        _tqdm=None,
+        progress: float | None = None,
+    ) -> None:
+        self.iterable = iterable
+        self.index = index
+        self.length = length
+        self.desc = desc
+        self.unit = unit
+        self._tqdm = _tqdm
+        self.progress = progress
+@document("__call__", "tqdm")
+class Progress(Iterable):
+    """
+    The Progress class provides a custom progress tracker that is used in a function signature.
+    To attach a Progress tracker to a function, simply add a parameter right after the input parameters that has a default value set to a `gradio.Progress()` instance.
+    The Progress tracker can then be updated in the function by calling the Progress object or using the `tqdm` method on an Iterable.
+    The Progress tracker is currently only available with `queue()`.
+    Example:
+        import gradio as gr
+        import time
+        def my_function(x, progress=gr.Progress()):
+            progress(0, desc="Starting...")
+            time.sleep(1)
+            for i in progress.tqdm(range(100)):
+                time.sleep(0.1)
+            return x
+        gr.Interface(my_function, gr.Textbox(), gr.Textbox()).queue().launch()
+    Demos: progress
+    """
+    def __init__(
+        self,
+        track_tqdm: bool = False,
+        _callback: Callable | None = None,  # for internal use only
+        _event_id: str | None = None,
+    ):
+        """
+        Parameters:
+            track_tqdm: If True, the Progress object will track any tqdm.tqdm iterations with the tqdm library in the function.
+        """
+        self.track_tqdm = track_tqdm
+        self._callback = _callback
+        self._event_id = _event_id
+        self.iterables: List[TrackedIterable] = []
+    def __len__(self):
+        return self.iterables[-1].length
+    def __iter__(self):
+        return self
+    def __next__(self):
+        """
+        Updates progress tracker with next item in iterable.
+        """
+        if self._callback:
+            current_iterable = self.iterables[-1]
+            while (
+                not hasattr(current_iterable.iterable, "__next__")
+                and len(self.iterables) > 0
+            ):
+                current_iterable = self.iterables.pop()
+            self._callback(
+                event_id=self._event_id,
+                iterables=self.iterables,
+            )
+            assert current_iterable.index is not None, "Index not set."
+            current_iterable.index += 1
+            try:
+                return next(current_iterable.iterable)  # type: ignore
+            except StopIteration:
+                self.iterables.pop()
+                raise StopIteration
+        else:
+            return self
+    def __call__(
+        self,
+        progress: float | Tuple[int, int | None] | None,
+        desc: str | None = None,
+        total: int | None = None,
+        unit: str = "steps",
+        _tqdm=None,
+    ):
+        """
+        Updates progress tracker with progress and message text.
+        Parameters:
+            progress: If float, should be between 0 and 1 representing completion. If Tuple, first number represents steps completed, and second value represents total steps or None if unknown. If None, hides progress bar.
+            desc: description to display.
+            total: estimated total number of steps.
+            unit: unit of iterations.
+        """
+        if self._callback:
+            if isinstance(progress, tuple):
+                index, total = progress
+                progress = None
+            else:
+                index = None
+            self._callback(
+                event_id=self._event_id,
+                iterables=self.iterables
+                + [TrackedIterable(None, index, total, desc, unit, _tqdm, progress)],
+            )
+        else:
+            return progress
+    def tqdm(
+        self,
+        iterable: Iterable | None,
+        desc: str | None = None,
+        total: int | None = None,
+        unit: str = "steps",
+        _tqdm=None,
+        *args,
+        **kwargs,
+    ):
+        """
+        Attaches progress tracker to iterable, like tqdm.
+        Parameters:
+            iterable: iterable to attach progress tracker to.
+            desc: description to display.
+            total: estimated total number of steps.
+            unit: unit of iterations.
+        """
+        if self._callback:
+            if iterable is None:
+                new_iterable = TrackedIterable(None, 0, total, desc, unit, _tqdm)
+                self.iterables.append(new_iterable)
+                self._callback(event_id=self._event_id, iterables=self.iterables)
+                return self
+            length = len(iterable) if hasattr(iterable, "__len__") else None  # type: ignore
+            self.iterables.append(
+                TrackedIterable(iter(iterable), 0, length, desc, unit, _tqdm)
+            )
+        return self
+    def update(self, n=1):
+        """
+        Increases latest iterable with specified number of steps.
+        Parameters:
+            n: number of steps completed.
+        """
+        if self._callback and len(self.iterables) > 0:
+            current_iterable = self.iterables[-1]
+            assert current_iterable.index is not None, "Index not set."
+            current_iterable.index += n
+            self._callback(
+                event_id=self._event_id,
+                iterables=self.iterables,
+            )
+        else:
+            return
+    def close(self, _tqdm):
+        """
+        Removes iterable with given _tqdm.
+        """
+        if self._callback:
+            for i in range(len(self.iterables)):
+                if id(self.iterables[i]._tqdm) == id(_tqdm):
+                    self.iterables.pop(i)
+                    break
+            self._callback(
+                event_id=self._event_id,
+                iterables=self.iterables,
+            )
+        else:
+            return
+def create_tracker(root_blocks, event_id, fn, track_tqdm):
+    progress = Progress(_callback=root_blocks._queue.set_progress, _event_id=event_id)
+    if not track_tqdm:
+        return progress, fn
+    try:
+        _tqdm = __import__("tqdm")
+    except ModuleNotFoundError:
+        return progress, fn
+    if not hasattr(root_blocks, "_progress_tracker_per_thread"):
+        root_blocks._progress_tracker_per_thread = {}
+    def init_tqdm(self, iterable=None, desc=None, *args, **kwargs):
+        self._progress = root_blocks._progress_tracker_per_thread.get(
+            threading.get_ident()
+        )
+        if self._progress is not None:
+            self._progress.event_id = event_id
+            self._progress.tqdm(iterable, desc, _tqdm=self, *args, **kwargs)
+            kwargs["file"] = open(os.devnull, "w")
+        self.__init__orig__(iterable, desc, *args, **kwargs)
+    def iter_tqdm(self):
+        if self._progress is not None:
+            return self._progress
+        else:
+            return self.__iter__orig__()
+    def update_tqdm(self, n=1):
+        if self._progress is not None:
+            self._progress.update(n)
+        return self.__update__orig__(n)
+    def close_tqdm(self):
+        if self._progress is not None:
+            self._progress.close(self)
+        return self.__close__orig__()
+    def exit_tqdm(self, exc_type, exc_value, traceback):
+        if self._progress is not None:
+            self._progress.close(self)
+        return self.__exit__orig__(exc_type, exc_value, traceback)
+    if not hasattr(_tqdm.tqdm, "__init__orig__"):
+        _tqdm.tqdm.__init__orig__ = _tqdm.tqdm.__init__
+    _tqdm.tqdm.__init__ = init_tqdm
+    if not hasattr(_tqdm.tqdm, "__update__orig__"):
+        _tqdm.tqdm.__update__orig__ = _tqdm.tqdm.update
+    _tqdm.tqdm.update = update_tqdm
+    if not hasattr(_tqdm.tqdm, "__close__orig__"):
+        _tqdm.tqdm.__close__orig__ = _tqdm.tqdm.close
+    _tqdm.tqdm.close = close_tqdm
+    if not hasattr(_tqdm.tqdm, "__exit__orig__"):
+        _tqdm.tqdm.__exit__orig__ = _tqdm.tqdm.__exit__
+    _tqdm.tqdm.__exit__ = exit_tqdm
+    if not hasattr(_tqdm.tqdm, "__iter__orig__"):
+        _tqdm.tqdm.__iter__orig__ = _tqdm.tqdm.__iter__
+    _tqdm.tqdm.__iter__ = iter_tqdm
+    if hasattr(_tqdm, "auto") and hasattr(_tqdm.auto, "tqdm"):
+        _tqdm.auto.tqdm = _tqdm.tqdm
+    def tracked_fn(*args):
+        thread_id = threading.get_ident()
+        root_blocks._progress_tracker_per_thread[thread_id] = progress
+        response = fn(*args)
+        del root_blocks._progress_tracker_per_thread[thread_id]
+        return response
+    return progress, tracked_fn
+def special_args(
+    fn: Callable,
+    inputs: List[Any] | None = None,
+    request: routes.Request | None = None,
+):
+    """
+    Checks if function has special arguments Request (via annotation) or Progress (via default value).
+    If inputs is provided, these values will be loaded into the inputs array.
+    Parameters:
+        block_fn: function to check.
+        inputs: array to load special arguments into.
+        request: request to load into inputs.
+    Returns:
+        updated inputs, request index, progress index
+    """
+    signature = inspect.signature(fn)
+    positional_args = []
+    for i, param in enumerate(signature.parameters.values()):
+        if param.kind not in (param.POSITIONAL_ONLY, param.POSITIONAL_OR_KEYWORD):
+            break
+        positional_args.append(param)
+    progress_index = None
+    for i, param in enumerate(positional_args):
+        if isinstance(param.default, Progress):
+            progress_index = i
+            if inputs is not None:
+                inputs.insert(i, param.default)
+        elif param.annotation == routes.Request:
+            if inputs is not None:
+                inputs.insert(i, request)
+    if inputs is not None:
+        while len(inputs) < len(positional_args):
+            i = len(inputs)
+            param = positional_args[i]
+            if param.default == param.empty:
+                warnings.warn("Unexpected argument. Filling with None.")
+                inputs.append(None)
+            else:
+                inputs.append(param.default)
+    return inputs or [], progress_index
+@document()
+def update(**kwargs) -> dict:
+    """
+    Updates component properties. When a function passed into a Gradio Interface or a Blocks events returns a typical value, it updates the value of the output component. But it is also possible to update the properties of an output component (such as the number of lines of a `Textbox` or the visibility of an `Image`) by returning the component's `update()` function, which takes as parameters any of the constructor parameters for that component.
+    This is a shorthand for using the update method on a component.
+    For example, rather than using gr.Number.update(...) you can just use gr.update(...).
+    Note that your editor's autocompletion will suggest proper parameters
+    if you use the update method on the component.
+    Demos: blocks_essay, blocks_update, blocks_essay_update
+    Parameters:
+        kwargs: Key-word arguments used to update the component's properties.
+    Example:
+        # Blocks Example
+        import gradio as gr
+        with gr.Blocks() as demo:
+            radio = gr.Radio([1, 2, 4], label="Set the value of the number")
+            number = gr.Number(value=2, interactive=True)
+            radio.change(fn=lambda value: gr.update(value=value), inputs=radio, outputs=number)
+        demo.launch()
+        # Interface example
+        import gradio as gr
+        def change_textbox(choice):
+          if choice == "short":
+              return gr.Textbox.update(lines=2, visible=True)
+          elif choice == "long":
+              return gr.Textbox.update(lines=8, visible=True)
+          else:
+              return gr.Textbox.update(visible=False)
+        gr.Interface(
+          change_textbox,
+          gr.Radio(
+              ["short", "long", "none"], label="What kind of essay would you like to write?"
+          ),
+          gr.Textbox(lines=2),
+          live=True,
+        ).launch()
+    """
+    kwargs["__type__"] = "generic_update"
+    return kwargs
+def skip() -> dict:
+    return update()
+@document()
+def make_waveform(
+    audio: str | Tuple[int, np.ndarray],
+    *,
+    bg_color: str = "#f3f4f6",
+    bg_image: str | None = None,
+    fg_alpha: float = 0.75,
+    bars_color: str | Tuple[str, str] = ("#fbbf24", "#ea580c"),
+    bar_count: int = 50,
+    bar_width: float = 0.6,
+):
+    """
+    Generates a waveform video from an audio file. Useful for creating an easy to share audio visualization. The output should be passed into a `gr.Video` component.
+    Parameters:
+        audio: Audio file path or tuple of (sample_rate, audio_data)
+        bg_color: Background color of waveform (ignored if bg_image is provided)
+        bg_image: Background image of waveform
+        fg_alpha: Opacity of foreground waveform
+        bars_color: Color of waveform bars. Can be a single color or a tuple of (start_color, end_color) of gradient
+        bar_count: Number of bars in waveform
+        bar_width: Width of bars in waveform. 1 represents full width, 0.5 represents half width, etc.
+    Returns:
+        A filepath to the output video.
+    """
+    if isinstance(audio, str):
+        audio_file = audio
+        audio = processing_utils.audio_from_file(audio)
+    else:
+        tmp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        processing_utils.audio_to_file(audio[0], audio[1], tmp_wav.name)
+        audio_file = tmp_wav.name
+    duration = round(len(audio[1]) / audio[0], 4)
+    # Helper methods to create waveform
+    def hex_to_RGB(hex_str):
+        return [int(hex_str[i : i + 2], 16) for i in range(1, 6, 2)]
+    def get_color_gradient(c1, c2, n):
+        assert n > 1
+        c1_rgb = np.array(hex_to_RGB(c1)) / 255
+        c2_rgb = np.array(hex_to_RGB(c2)) / 255
+        mix_pcts = [x / (n - 1) for x in range(n)]
+        rgb_colors = [((1 - mix) * c1_rgb + (mix * c2_rgb)) for mix in mix_pcts]
+        return [
+            "#" + "".join([format(int(round(val * 255)), "02x") for val in item])
+            for item in rgb_colors
+        ]
+    # Reshape audio to have a fixed number of bars
+    samples = audio[1]
+    if len(samples.shape) > 1:
+        samples = np.mean(samples, 1)
+    bins_to_pad = bar_count - (len(samples) % bar_count)
+    samples = np.pad(samples, [(0, bins_to_pad)])
+    samples = np.reshape(samples, (bar_count, -1))
+    samples = np.abs(samples)
+    samples = np.max(samples, 1)
+    matplotlib.use("Agg")
+    plt.clf()
+    # Plot waveform
+    color = (
+        bars_color
+        if isinstance(bars_color, str)
+        else get_color_gradient(bars_color[0], bars_color[1], bar_count)
+    )
+    plt.bar(
+        np.arange(0, bar_count),
+        samples * 2,
+        bottom=(-1 * samples),
+        width=bar_width,
+        color=color,
+    )
+    plt.axis("off")
+    plt.margins(x=0)
+    tmp_img = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
+    savefig_kwargs: Dict[str, Any] = {"bbox_inches": "tight"}
+    if bg_image is not None:
+        savefig_kwargs["transparent"] = True
+    else:
+        savefig_kwargs["facecolor"] = bg_color
+    plt.savefig(tmp_img.name, **savefig_kwargs)
+    waveform_img = PIL.Image.open(tmp_img.name)
+    waveform_img = waveform_img.resize((1000, 200))
+    # Composite waveform with background image
+    if bg_image is not None:
+        waveform_array = np.array(waveform_img)
+        waveform_array[:, :, 3] = waveform_array[:, :, 3] * fg_alpha
+        waveform_img = PIL.Image.fromarray(waveform_array)
+        bg_img = PIL.Image.open(bg_image)
+        waveform_width, waveform_height = waveform_img.size
+        bg_width, bg_height = bg_img.size
+        if waveform_width != bg_width:
+            bg_img = bg_img.resize(
+                (waveform_width, 2 * int(bg_height * waveform_width / bg_width / 2))
+            )
+            bg_width, bg_height = bg_img.size
+        composite_height = max(bg_height, waveform_height)
+        composite = PIL.Image.new("RGBA", (waveform_width, composite_height), "#FFFFFF")
+        composite.paste(bg_img, (0, composite_height - bg_height))
+        composite.paste(
+            waveform_img, (0, composite_height - waveform_height), waveform_img
+        )
+        composite.save(tmp_img.name)
+        img_width, img_height = composite.size
+    else:
+        img_width, img_height = waveform_img.size
+        waveform_img.save(tmp_img.name)
+    # Convert waveform to video with ffmpeg
+    output_mp4 = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    ffmpeg_cmd = f"""ffmpeg -loop 1 -i {tmp_img.name} -i {audio_file} -vf "color=c=#FFFFFF77:s={img_width}x{img_height}[bar];[0][bar]overlay=-w+(w/{duration})*t:H-h:shortest=1" -t {duration} -y {output_mp4.name}"""
+    subprocess.call(ffmpeg_cmd, shell=True)
+    return output_mp4.name

gligen/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

gligen/SD_input_conv_weight_bias.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5a0efad69747a766158304f39091c2b6a24cafb5f833d174f32bee8e864a562
+size 130

gligen/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import os, sys
+sys.path.append(os.path.dirname(__file__))
+sys.path.append(os.path.join(os.path.dirname(__file__), "ldm"))
+import gligen.evaluator as evaluator
+import gligen.trainer as trainer
+# import gligen.ldm as ldm

gligen/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (345 Bytes). View file

gligen/__pycache__/distributed.cpython-38.pyc ADDED Viewed

Binary file (2.91 kB). View file

gligen/__pycache__/evaluator.cpython-38.pyc ADDED Viewed

Binary file (5.9 kB). View file

gligen/__pycache__/task_grounded_generation.cpython-38.pyc ADDED Viewed

Binary file (9.11 kB). View file

gligen/__pycache__/trainer.cpython-38.pyc ADDED Viewed

Binary file (11.7 kB). View file

gligen/create_meta.py ADDED Viewed

	@@ -0,0 +1,170 @@

+CKPTS = [
+    dict(
+        path="/home/chunyl/azure_mount/yuhengdb/fine_tune_ldm/version5_branch6_output/GoldG+SBU+CC3M+CC12M+O365/second_stage_drop_both/tag01/checkpoint_00450001.pth",
+        feature_type=['before','after_reproject'],
+        save_folder_name="v5b6_drop_both",
+    ),
+    # dict(
+    #     path="/home/v-yuhengli/blobfuse/output/fine_tune_ldm/version5_branch6_output/GoldG+SBU+CC3M+CC12M+O365/second_stage_drop_none/tag00/checkpoint_00165001.pth",
+    #     feature_type=['before','after_reproject'],
+    #     save_folder_name="v5b6_drop_none",
+    # ),
+]
+# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = #
+    # if meta["has_image_mask"] == 0:
+    #     image_embeddings = text_embeddings
+    # if meta["has_text_mask"] == 0:
+    #     text_embeddings = image_embeddings
+    # out = {
+    #     "boxes" : boxes.unsqueeze(0).repeat(batch,1,1),
+    #     "masks" : masks.unsqueeze(0).repeat(batch,1),
+    #     "text_masks" : masks.unsqueeze(0).repeat(batch,1),
+    #     "image_masks" : masks.unsqueeze(0).repeat(batch,1),
+    #     "text_embeddings"  : text_embeddings.unsqueeze(0).repeat(batch,1,1),
+    #     "image_embeddings" : image_embeddings.unsqueeze(0).repeat(batch,1,1)
+    # }
+META = [
+    dict(
+        prompt = "a teddy bear sitting next to a red bird",
+        phrases = ['a teddy bear', 'a red bird'],
+        images = ['images/teddy.jpg', 'images/red_bird.jpg'],
+        locations = [ [0.0,0.09,0.33,0.76], [0.55,0.11,1.0,0.8]   ],
+        alpha_type = [1.0, 0, 0.0],
+        has_text_mask = 1,
+        has_image_mask = 0,
+        save_folder_name="teddy_bird_1_1"
+    ),
+    # dict(
+    #     prompt = "a teddy bear sitting next to a bird",
+    #     phrases = ['a teddy bear', 'a bird'],
+    #     images = ['images/teddy.jpg', 'images/red_bird.jpg'],
+    #     locations = [ [0.0,0.09,0.33,0.76], [0.55,0.11,1.0,0.8]   ],
+    #     alpha_type = [1.0, 0, 0.0],
+    #     has_text_mask = 1,
+    #     has_image_mask = 1,
+    #     save_folder_name="teddy_bird_1_1"
+    # ),
+    # dict(
+    #     prompt = "a teddy bear sitting next to a bird",
+    #     phrases = ['a teddy bear', 'a bird'],
+    #     images = ['images/teddy.jpg', 'images/red_bird.jpg'],
+    #     locations = [ [0.0,0.09,0.33,0.76], [0.55,0.11,1.0,0.8]   ],
+    #     alpha_type = [0.5, 0, 0.5],
+    #     has_text_mask = 1,
+    #     has_image_mask = 0,
+    #     save_folder_name="teddy_bird_1_0"
+    # ),
+    # dict(
+    #     prompt = "",
+    #     phrases = ['a teddy bear', 'an umbrella'],
+    #     images = ['images/teddy.jpg', 'images/umbrella.png'],
+    #     locations = [ [0.0,0.09,0.33,0.76], [0.55,0.11,1.0,0.8]   ],
+    #     alpha_type = [1.0, 0, 0.0],
+    #     has_text_mask = 1,
+    #     has_image_mask = 1,
+    #     save_folder_name="empty_teddy_umbrella_1_1"
+    # ),
+    # dict(
+    #     prompt = "hello kitty and bird hybrid",
+    #     phrases = ['a hello kitty', 'a hello kitty'],
+    #     images = ['images/red_bird.jpg', 'images/red_bird.jpg'],
+    #     locations = [ [0.0,0.09,0.33,0.76], [0.55,0.11,1.0,0.8]   ],
+    #     has_text_mask = 1,
+    #     has_image_mask = 1,
+    #     save_folder_name="hello+bird_1_1"
+    # ),
+    # dict(
+    #     prompt = "hello kitty and teddy bear hybrid",
+    #     phrases = ['a hello kitty', 'a hello kitty'],
+    #     images = ['images/teddy.jpg', 'images/teddy.jpg'],
+    #     locations = [ [0.0,0.09,0.33,0.76], [0.55,0.11,1.0,0.8]   ],
+    #     has_text_mask = 1,
+    #     has_image_mask = 1,
+    #     save_folder_name="hello+teddy_1_1"
+    # ),
+    # dict(
+    #     prompt = "bird and hello kitty hybrid",
+    #     phrases = ['a bird', 'a bird'],
+    #     images = ['images/hello.jpg', 'images/hello.jpg'],
+    #     locations = [ [0.0,0.09,0.33,0.76], [0.55,0.11,1.0,0.8]   ],
+    #     alpha_type = [1.0, 0, 0.0],
+    #     has_text_mask = 1,
+    #     has_image_mask = 0.5,
+    #     save_folder_name="bird+hello_1_1"
+    # ),
+    # dict(
+    #     prompt = "a deer standing in front of a brick house in the woods, anime, oil painting, high resolution, cottagecore, ghibli inspired, 4k",
+    #     phrases = ['a deer'],
+    #     images = ['images/sky.jpg'],
+    #     locations = [ [0.0,0.5,0.5,0.9] ],
+    #     alpha_type = [1, 0, 0],
+    #     has_text_mask = 1,
+    #     has_image_mask = 1,
+    #     save_folder_name="deer_sky"
+    # ),
+    # dict(
+    #     prompt = "A woman sitting in a restaurant with a slice of pizza in front of her",
+    #     phrases = ['dining table', 'pizza', 'person', 'wall', 'car', 'paper', 'chair', 'window', 'bottle', 'cup'],
+    #     images = ['images/hello.jpg','images/hello.jpg','images/hello.jpg','images/hello.jpg','images/hello.jpg','images/hello.jpg','images/hello.jpg','images/hello.jpg','images/hello.jpg','images/hello.jpg'],
+    #     locations = [   [0.0030, 0.3589, 1.0000, 1.0000],
+    #                     [0.0779, 0.6744, 0.9768, 1.0000],
+    #                     [0.2236, 0.0000, 0.7809, 0.4352],
+    #                     [0.0000, 0.0000, 0.4313, 0.4505],
+    #                     [0.6275, 0.1050, 0.9444, 0.2497],
+    #                     [0.0000, 0.3859, 0.1250, 0.6922],
+    #                     [0.7137, 0.2389, 0.8540, 0.4549],
+    #                     [0.0000, 0.0000, 0.4667, 0.0630],
+    #                     [0.3822, 0.4235, 0.4932, 0.6575],
+    #                     [0.6616, 0.3617, 0.7880, 0.5165]  ],
+    #     alpha_type = [0.0, 0, 1.0],
+    #     has_text_mask = 1,
+    #     has_image_mask = 0,
+    #     save_folder_name="pizza_1_0"
+    # ),
+]

gligen/distributed.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import math
+import pickle
+import torch
+from torch import distributed as dist
+from torch.utils.data.sampler import Sampler
+def get_rank():
+    if not dist.is_available():
+        return 0
+    if not dist.is_initialized():
+        return 0
+    return dist.get_rank()
+def synchronize():
+    if not dist.is_available():
+        return
+    if not dist.is_initialized():
+        return
+    world_size = dist.get_world_size()
+    if world_size == 1:
+        return
+    dist.barrier()
+def get_world_size():
+    if not dist.is_available():
+        return 1
+    if not dist.is_initialized():
+        return 1
+    return dist.get_world_size()
+def reduce_sum(tensor):
+    if not dist.is_available():
+        return tensor
+    if not dist.is_initialized():
+        return tensor
+    tensor = tensor.clone()
+    dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
+    return tensor
+def gather_grad(params):
+    world_size = get_world_size()
+    if world_size == 1:
+        return
+    for param in params:
+        if param.grad is not None:
+            dist.all_reduce(param.grad.data, op=dist.ReduceOp.SUM)
+            param.grad.data.div_(world_size)
+def all_gather(data):
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+    buffer = pickle.dumps(data)
+    storage = torch.ByteStorage.from_buffer(buffer)
+    tensor = torch.ByteTensor(storage).to('cuda')
+    local_size = torch.IntTensor([tensor.numel()]).to('cuda')
+    size_list = [torch.IntTensor([0]).to('cuda') for _ in range(world_size)]
+    dist.all_gather(size_list, local_size)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.ByteTensor(size=(max_size,)).to('cuda'))
+    if local_size != max_size:
+        padding = torch.ByteTensor(size=(max_size - local_size,)).to('cuda')
+        tensor = torch.cat((tensor, padding), 0)
+    dist.all_gather(tensor_list, tensor)
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        buffer = tensor.cpu().numpy().tobytes()[:size]
+        data_list.append(pickle.loads(buffer))
+    return data_list
+def reduce_loss_dict(loss_dict):
+    world_size = get_world_size()
+    if world_size < 2:
+        return loss_dict
+    with torch.no_grad():
+        keys = []
+        losses = []
+        for k in sorted(loss_dict.keys()):
+            keys.append(k)
+            losses.append(loss_dict[k])
+        losses = torch.stack(losses, 0)
+        dist.reduce(losses, dst=0)
+        if dist.get_rank() == 0:
+            losses /= world_size
+        reduced_losses = {k: v for k, v in zip(keys, losses)}
+    return reduced_losses

gligen/evaluator.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import torch
+from ldm.models.diffusion.ddim import DDIMSampler
+from ldm.models.diffusion.plms import PLMSSampler
+from ldm.util import instantiate_from_config
+import numpy as np
+import random
+from dataset.concat_dataset import ConCatDataset #, collate_fn
+from torch.utils.data import DataLoader
+from torch.utils.data.distributed import  DistributedSampler
+import os
+from tqdm import tqdm
+from distributed import get_rank, synchronize, get_world_size
+from trainer import read_official_ckpt, batch_to_device, ImageCaptionSaver, wrap_loader #, get_padded_boxes
+from PIL import Image
+import math
+import json
+#hello
+def draw_masks_from_boxes(boxes,size):
+    image_masks = []
+    for box in boxes:
+        image_mask = torch.ones(size[0],size[1])
+        for bx in box:
+            x0, x1 = bx[0]*size[0], bx[2]*size[0]
+            y0, y1 = bx[1]*size[1], bx[3]*size[1]
+            image_mask[int(y0):int(y1), int(x0):int(x1)] = 0
+        image_masks.append(image_mask)
+    return torch.stack(image_masks).unsqueeze(1)
+def set_alpha_scale(model, alpha_scale):
+    from ldm.modules.attention import GatedCrossAttentionDense, GatedSelfAttentionDense
+    for module in model.modules():
+        if type(module) == GatedCrossAttentionDense or type(module) == GatedSelfAttentionDense:
+            module.scale = alpha_scale
+            # print("scale:   ", alpha_scale)
+            # print("attn:  ", module.alpha_attn)
+            # print("dense:   ", module.alpha_dense)
+            # print('  ')
+            # print('  ')
+def save_images(samples, image_ids, folder, to256):
+    for sample, image_id in zip(samples, image_ids):
+        sample = torch.clamp(sample, min=-1, max=1) * 0.5 + 0.5
+        sample = sample.cpu().numpy().transpose(1,2,0) * 255
+        img_name = str(int(image_id))+'.png'
+        img = Image.fromarray(sample.astype(np.uint8))
+        if to256:
+            img = img.resize( (256,256), Image.BICUBIC)
+        img.save(os.path.join(folder,img_name))
+def ckpt_to_folder_name(basename):
+    name=""
+    for s in basename:
+        if s.isdigit():
+            name+=s
+    seen = round( int(name)/1000, 1 )
+    return str(seen).ljust(4,'0')+'k'
+class Evaluator:
+    def __init__(self, config):
+        self.config = config
+        self.device = torch.device("cuda")
+        # = = = = = create model and diffusion = = = = = #
+        if self.config.ckpt != "real":
+            self.model = instantiate_from_config(config.model).to(self.device)
+            self.autoencoder = instantiate_from_config(config.autoencoder).to(self.device)
+            self.text_encoder = instantiate_from_config(config.text_encoder).to(self.device)
+            self.diffusion = instantiate_from_config(config.diffusion).to(self.device)
+            # donot need to load official_ckpt for self.model here, since we will load from our ckpt
+            state_dict = read_official_ckpt( os.path.join(config.DATA_ROOT, config.official_ckpt_name)  )
+            self.autoencoder.load_state_dict( state_dict["autoencoder"]  )
+            self.text_encoder.load_state_dict( state_dict["text_encoder"]  )
+            self.diffusion.load_state_dict( state_dict["diffusion"]  )
+        # = = = = = load from our ckpt = = = = = #
+        if self.config.ckpt == "real":
+            print("Saving all real images...")
+            self.just_save_real = True
+        else:
+            checkpoint = torch.load(self.config.ckpt, map_location="cpu")
+            which_state = 'ema' if 'ema' in checkpoint else "model"
+            which_state = which_state if config.which_state is None else config.which_state
+            self.model.load_state_dict(checkpoint[which_state])
+            print("ckpt is loaded")
+            self.just_save_real = False
+            set_alpha_scale(self.model, self.config.alpha_scale)
+            self.autoencoder.eval()
+            self.model.eval()
+            self.text_encoder.eval()
+        # = = = = = create data = = = = = #
+        self.dataset_eval = ConCatDataset(config.val_dataset_names, config.DATA_ROOT, config.which_embedder, train=False)
+        print("total eval images: ", len(self.dataset_eval))
+        sampler = DistributedSampler(self.dataset_eval,shuffle=False) if config.distributed else None
+        loader_eval = DataLoader( self.dataset_eval,batch_size=config.batch_size,
+                                                    num_workers=config.workers,
+                                                    pin_memory=True,
+                                                    sampler=sampler,
+                                                    drop_last=False) # shuffle default is False
+        self.loader_eval = loader_eval
+        # = = = = = create output folder = = = = = #
+        folder_name = ckpt_to_folder_name(os.path.basename(config.ckpt))
+        self.outdir = os.path.join(config.OUTPUT_ROOT, folder_name)
+        self.outdir_real = os.path.join(self.outdir,'real')
+        self.outdir_fake = os.path.join(self.outdir,'fake')
+        if config.to256:
+            self.outdir_real256 = os.path.join(self.outdir,'real256')
+            self.outdir_fake256 = os.path.join(self.outdir,'fake256')
+        synchronize() # if rank0 is faster, it may mkdir before the other rank call os.listdir()
+        if get_rank() == 0:
+            os.makedirs(self.outdir, exist_ok=True)
+            os.makedirs(self.outdir_real, exist_ok=True)
+            os.makedirs(self.outdir_fake, exist_ok=True)
+            if config.to256:
+                os.makedirs(self.outdir_real256, exist_ok=True)
+                os.makedirs(self.outdir_fake256, exist_ok=True)
+        print(self.outdir) # double check
+        self.evaluation_finished = False
+        if os.path.exists(  os.path.join(self.outdir,'score.txt')  ):
+            self.evaluation_finished = True
+    def alread_saved_this_batch(self, batch):
+        existing_real_files = os.listdir( self.outdir_real  )
+        existing_fake_files = os.listdir( self.outdir_fake  )
+        status = []
+        for image_id in batch["id"]:
+            img_name = str(int(image_id))+'.png'
+            status.append(img_name in existing_real_files)
+            status.append(img_name in existing_fake_files)
+        return all(status)
+    @torch.no_grad()
+    def start_evaluating(self):
+        iterator = tqdm( self.loader_eval, desc='Evaluating progress')
+        for batch in iterator:
+            #if not self.alread_saved_this_batch(batch):
+            if True:
+                batch_to_device(batch, self.device)
+                batch_size = batch["image"].shape[0]
+                samples_real = batch["image"]
+                if self.just_save_real:
+                    samples_fake = None
+                else:
+                    uc = self.text_encoder.encode( batch_size*[""] )
+                    context = self.text_encoder.encode(  batch["caption"]  )
+                    image_mask = x0 = None
+                    if self.config.inpaint:
+                        image_mask = draw_masks_from_boxes( batch['boxes'], self.model.image_size  ).cuda()
+                        x0 = self.autoencoder.encode( batch["image"] )
+                    shape = (batch_size, self.model.in_channels, self.model.image_size, self.model.image_size)
+                    if self.config.no_plms:
+                        sampler = DDIMSampler(self.diffusion, self.model)
+                        steps = 250
+                    else:
+                        sampler = PLMSSampler(self.diffusion, self.model)
+                        steps = 50
+                    input = dict( x=None, timesteps=None, context=context, boxes=batch['boxes'], masks=batch['masks'], positive_embeddings=batch["positive_embeddings"] )
+                    samples_fake = sampler.sample(S=steps, shape=shape, input=input,  uc=uc, guidance_scale=self.config.guidance_scale, mask=image_mask, x0=x0)
+                    samples_fake = self.autoencoder.decode(samples_fake)
+                save_images(samples_real, batch['id'], self.outdir_real, to256=False )
+                if self.config.to256:
+                    save_images(samples_real, batch['id'], self.outdir_real256, to256=True )
+                if samples_fake is not None:
+                    save_images(samples_fake, batch['id'], self.outdir_fake, to256=False )
+                    if self.config.to256:
+                        save_images(samples_fake, batch['id'], self.outdir_fake256, to256=True )
+    def fire_fid(self):
+        paths = [self.outdir_real, self.outdir_fake]
+        if self.config.to256:
+            paths = [self.outdir_real256, self.outdir_fake256]

gligen/ldm/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

gligen/ldm/__pycache__/util.cpython-38.pyc ADDED Viewed

Binary file (3.2 kB). View file

gligen/ldm/data/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

gligen/ldm/data/__init__.py ADDED Viewed

File without changes

gligen/ldm/data/base.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from abc import abstractmethod
+from torch.utils.data import Dataset, ConcatDataset, ChainDataset, IterableDataset
+class Txt2ImgIterableBaseDataset(IterableDataset):
+    '''
+    Define an interface to make the IterableDatasets for text2img data chainable
+    '''
+    def __init__(self, num_records=0, valid_ids=None, size=256):
+        super().__init__()
+        self.num_records = num_records
+        self.valid_ids = valid_ids
+        self.sample_ids = valid_ids
+        self.size = size
+        print(f'{self.__class__.__name__} dataset contains {self.__len__()} examples.')
+    def __len__(self):
+        return self.num_records
+    @abstractmethod
+    def __iter__(self):
+        pass

gligen/ldm/data/imagenet.py ADDED Viewed

	@@ -0,0 +1,394 @@

+import os, yaml, pickle, shutil, tarfile, glob
+import cv2
+import albumentations
+import PIL
+import numpy as np
+import torchvision.transforms.functional as TF
+from omegaconf import OmegaConf
+from functools import partial
+from PIL import Image
+from tqdm import tqdm
+from torch.utils.data import Dataset, Subset
+import taming.data.utils as tdu
+from taming.data.imagenet import str_to_indices, give_synsets_from_indices, download, retrieve
+from taming.data.imagenet import ImagePaths
+from ldm.modules.image_degradation import degradation_fn_bsr, degradation_fn_bsr_light
+def synset2idx(path_to_yaml="ldm/data/index_synset.yaml"):
+    with open(path_to_yaml) as f:
+        di2s = yaml.load(f)
+    return dict((v,k) for k,v in di2s.items())
+class ImageNetBase(Dataset):
+    def __init__(self, config=None):
+        self.config = config or OmegaConf.create()
+        if not type(self.config)==dict:
+            self.config = OmegaConf.to_container(self.config)
+        self.keep_orig_class_label = self.config.get("keep_orig_class_label", False)
+        self.process_images = True  # if False we skip loading & processing images and self.data contains filepaths
+        self._prepare()
+        self._prepare_synset_to_human()
+        self._prepare_idx_to_synset()
+        self._prepare_human_to_integer_label()
+        self._load()
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, i):
+        return self.data[i]
+    def _prepare(self):
+        raise NotImplementedError()
+    def _filter_relpaths(self, relpaths):
+        ignore = set([
+            "n06596364_9591.JPEG",
+        ])
+        relpaths = [rpath for rpath in relpaths if not rpath.split("/")[-1] in ignore]
+        if "sub_indices" in self.config:
+            indices = str_to_indices(self.config["sub_indices"])
+            synsets = give_synsets_from_indices(indices, path_to_yaml=self.idx2syn)  # returns a list of strings
+            self.synset2idx = synset2idx(path_to_yaml=self.idx2syn)
+            files = []
+            for rpath in relpaths:
+                syn = rpath.split("/")[0]
+                if syn in synsets:
+                    files.append(rpath)
+            return files
+        else:
+            return relpaths
+    def _prepare_synset_to_human(self):
+        SIZE = 2655750
+        URL = "https://heibox.uni-heidelberg.de/f/9f28e956cd304264bb82/?dl=1"
+        self.human_dict = os.path.join(self.root, "synset_human.txt")
+        if (not os.path.exists(self.human_dict) or
+                not os.path.getsize(self.human_dict)==SIZE):
+            download(URL, self.human_dict)
+    def _prepare_idx_to_synset(self):
+        URL = "https://heibox.uni-heidelberg.de/f/d835d5b6ceda4d3aa910/?dl=1"
+        self.idx2syn = os.path.join(self.root, "index_synset.yaml")
+        if (not os.path.exists(self.idx2syn)):
+            download(URL, self.idx2syn)
+    def _prepare_human_to_integer_label(self):
+        URL = "https://heibox.uni-heidelberg.de/f/2362b797d5be43b883f6/?dl=1"
+        self.human2integer = os.path.join(self.root, "imagenet1000_clsidx_to_labels.txt")
+        if (not os.path.exists(self.human2integer)):
+            download(URL, self.human2integer)
+        with open(self.human2integer, "r") as f:
+            lines = f.read().splitlines()
+            assert len(lines) == 1000
+            self.human2integer_dict = dict()
+            for line in lines:
+                value, key = line.split(":")
+                self.human2integer_dict[key] = int(value)
+    def _load(self):
+        with open(self.txt_filelist, "r") as f:
+            self.relpaths = f.read().splitlines()
+            l1 = len(self.relpaths)
+            self.relpaths = self._filter_relpaths(self.relpaths)
+            print("Removed {} files from filelist during filtering.".format(l1 - len(self.relpaths)))
+        self.synsets = [p.split("/")[0] for p in self.relpaths]
+        self.abspaths = [os.path.join(self.datadir, p) for p in self.relpaths]
+        unique_synsets = np.unique(self.synsets)
+        class_dict = dict((synset, i) for i, synset in enumerate(unique_synsets))
+        if not self.keep_orig_class_label:
+            self.class_labels = [class_dict[s] for s in self.synsets]
+        else:
+            self.class_labels = [self.synset2idx[s] for s in self.synsets]
+        with open(self.human_dict, "r") as f:
+            human_dict = f.read().splitlines()
+            human_dict = dict(line.split(maxsplit=1) for line in human_dict)
+        self.human_labels = [human_dict[s] for s in self.synsets]
+        labels = {
+            "relpath": np.array(self.relpaths),
+            "synsets": np.array(self.synsets),
+            "class_label": np.array(self.class_labels),
+            "human_label": np.array(self.human_labels),
+        }
+        if self.process_images:
+            self.size = retrieve(self.config, "size", default=256)
+            self.data = ImagePaths(self.abspaths,
+                                   labels=labels,
+                                   size=self.size,
+                                   random_crop=self.random_crop,
+                                   )
+        else:
+            self.data = self.abspaths
+class ImageNetTrain(ImageNetBase):
+    NAME = "ILSVRC2012_train"
+    URL = "http://www.image-net.org/challenges/LSVRC/2012/"
+    AT_HASH = "a306397ccf9c2ead27155983c254227c0fd938e2"
+    FILES = [
+        "ILSVRC2012_img_train.tar",
+    ]
+    SIZES = [
+        147897477120,
+    ]
+    def __init__(self, process_images=True, data_root=None, **kwargs):
+        self.process_images = process_images
+        self.data_root = data_root
+        super().__init__(**kwargs)
+    def _prepare(self):
+        if self.data_root:
+            self.root = os.path.join(self.data_root, self.NAME)
+        else:
+            cachedir = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache"))
+            self.root = os.path.join(cachedir, "autoencoders/data", self.NAME)
+        self.datadir = os.path.join(self.root, "data")
+        self.txt_filelist = os.path.join(self.root, "filelist.txt")
+        self.expected_length = 1281167
+        self.random_crop = retrieve(self.config, "ImageNetTrain/random_crop",
+                                    default=True)
+        if not tdu.is_prepared(self.root):
+            # prep
+            print("Preparing dataset {} in {}".format(self.NAME, self.root))
+            datadir = self.datadir
+            if not os.path.exists(datadir):
+                path = os.path.join(self.root, self.FILES[0])
+                if not os.path.exists(path) or not os.path.getsize(path)==self.SIZES[0]:
+                    import academictorrents as at
+                    atpath = at.get(self.AT_HASH, datastore=self.root)
+                    assert atpath == path
+                print("Extracting {} to {}".format(path, datadir))
+                os.makedirs(datadir, exist_ok=True)
+                with tarfile.open(path, "r:") as tar:
+                    tar.extractall(path=datadir)
+                print("Extracting sub-tars.")
+                subpaths = sorted(glob.glob(os.path.join(datadir, "*.tar")))
+                for subpath in tqdm(subpaths):
+                    subdir = subpath[:-len(".tar")]
+                    os.makedirs(subdir, exist_ok=True)
+                    with tarfile.open(subpath, "r:") as tar:
+                        tar.extractall(path=subdir)
+            filelist = glob.glob(os.path.join(datadir, "**", "*.JPEG"))
+            filelist = [os.path.relpath(p, start=datadir) for p in filelist]
+            filelist = sorted(filelist)
+            filelist = "\n".join(filelist)+"\n"
+            with open(self.txt_filelist, "w") as f:
+                f.write(filelist)
+            tdu.mark_prepared(self.root)
+class ImageNetValidation(ImageNetBase):
+    NAME = "ILSVRC2012_validation"
+    URL = "http://www.image-net.org/challenges/LSVRC/2012/"
+    AT_HASH = "5d6d0df7ed81efd49ca99ea4737e0ae5e3a5f2e5"
+    VS_URL = "https://heibox.uni-heidelberg.de/f/3e0f6e9c624e45f2bd73/?dl=1"
+    FILES = [
+        "ILSVRC2012_img_val.tar",
+        "validation_synset.txt",
+    ]
+    SIZES = [
+        6744924160,
+        1950000,
+    ]
+    def __init__(self, process_images=True, data_root=None, **kwargs):
+        self.data_root = data_root
+        self.process_images = process_images
+        super().__init__(**kwargs)
+    def _prepare(self):
+        if self.data_root:
+            self.root = os.path.join(self.data_root, self.NAME)
+        else:
+            cachedir = os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache"))
+            self.root = os.path.join(cachedir, "autoencoders/data", self.NAME)
+        self.datadir = os.path.join(self.root, "data")
+        self.txt_filelist = os.path.join(self.root, "filelist.txt")
+        self.expected_length = 50000
+        self.random_crop = retrieve(self.config, "ImageNetValidation/random_crop",
+                                    default=False)
+        if not tdu.is_prepared(self.root):
+            # prep
+            print("Preparing dataset {} in {}".format(self.NAME, self.root))
+            datadir = self.datadir
+            if not os.path.exists(datadir):
+                path = os.path.join(self.root, self.FILES[0])
+                if not os.path.exists(path) or not os.path.getsize(path)==self.SIZES[0]:
+                    import academictorrents as at
+                    atpath = at.get(self.AT_HASH, datastore=self.root)
+                    assert atpath == path
+                print("Extracting {} to {}".format(path, datadir))
+                os.makedirs(datadir, exist_ok=True)
+                with tarfile.open(path, "r:") as tar:
+                    tar.extractall(path=datadir)
+                vspath = os.path.join(self.root, self.FILES[1])
+                if not os.path.exists(vspath) or not os.path.getsize(vspath)==self.SIZES[1]:
+                    download(self.VS_URL, vspath)
+                with open(vspath, "r") as f:
+                    synset_dict = f.read().splitlines()
+                    synset_dict = dict(line.split() for line in synset_dict)
+                print("Reorganizing into synset folders")
+                synsets = np.unique(list(synset_dict.values()))
+                for s in synsets:
+                    os.makedirs(os.path.join(datadir, s), exist_ok=True)
+                for k, v in synset_dict.items():
+                    src = os.path.join(datadir, k)
+                    dst = os.path.join(datadir, v)
+                    shutil.move(src, dst)
+            filelist = glob.glob(os.path.join(datadir, "**", "*.JPEG"))
+            filelist = [os.path.relpath(p, start=datadir) for p in filelist]
+            filelist = sorted(filelist)
+            filelist = "\n".join(filelist)+"\n"
+            with open(self.txt_filelist, "w") as f:
+                f.write(filelist)
+            tdu.mark_prepared(self.root)
+class ImageNetSR(Dataset):
+    def __init__(self, size=None,
+                 degradation=None, downscale_f=4, min_crop_f=0.5, max_crop_f=1.,
+                 random_crop=True):
+        """
+        Imagenet Superresolution Dataloader
+        Performs following ops in order:
+        1.  crops a crop of size s from image either as random or center crop
+        2.  resizes crop to size with cv2.area_interpolation
+        3.  degrades resized crop with degradation_fn
+        :param size: resizing to size after cropping
+        :param degradation: degradation_fn, e.g. cv_bicubic or bsrgan_light
+        :param downscale_f: Low Resolution Downsample factor
+        :param min_crop_f: determines crop size s,
+          where s = c * min_img_side_len with c sampled from interval (min_crop_f, max_crop_f)
+        :param max_crop_f: ""
+        :param data_root:
+        :param random_crop:
+        """
+        self.base = self.get_base()
+        assert size
+        assert (size / downscale_f).is_integer()
+        self.size = size
+        self.LR_size = int(size / downscale_f)
+        self.min_crop_f = min_crop_f
+        self.max_crop_f = max_crop_f
+        assert(max_crop_f <= 1.)
+        self.center_crop = not random_crop
+        self.image_rescaler = albumentations.SmallestMaxSize(max_size=size, interpolation=cv2.INTER_AREA)
+        self.pil_interpolation = False # gets reset later if incase interp_op is from pillow
+        if degradation == "bsrgan":
+            self.degradation_process = partial(degradation_fn_bsr, sf=downscale_f)
+        elif degradation == "bsrgan_light":
+            self.degradation_process = partial(degradation_fn_bsr_light, sf=downscale_f)
+        else:
+            interpolation_fn = {
+            "cv_nearest": cv2.INTER_NEAREST,
+            "cv_bilinear": cv2.INTER_LINEAR,
+            "cv_bicubic": cv2.INTER_CUBIC,
+            "cv_area": cv2.INTER_AREA,
+            "cv_lanczos": cv2.INTER_LANCZOS4,
+            "pil_nearest": PIL.Image.NEAREST,
+            "pil_bilinear": PIL.Image.BILINEAR,
+            "pil_bicubic": PIL.Image.BICUBIC,
+            "pil_box": PIL.Image.BOX,
+            "pil_hamming": PIL.Image.HAMMING,
+            "pil_lanczos": PIL.Image.LANCZOS,
+            }[degradation]
+            self.pil_interpolation = degradation.startswith("pil_")
+            if self.pil_interpolation:
+                self.degradation_process = partial(TF.resize, size=self.LR_size, interpolation=interpolation_fn)
+            else:
+                self.degradation_process = albumentations.SmallestMaxSize(max_size=self.LR_size,
+                                                                          interpolation=interpolation_fn)
+    def __len__(self):
+        return len(self.base)
+    def __getitem__(self, i):
+        example = self.base[i]
+        image = Image.open(example["file_path_"])
+        if not image.mode == "RGB":
+            image = image.convert("RGB")
+        image = np.array(image).astype(np.uint8)
+        min_side_len = min(image.shape[:2])
+        crop_side_len = min_side_len * np.random.uniform(self.min_crop_f, self.max_crop_f, size=None)
+        crop_side_len = int(crop_side_len)
+        if self.center_crop:
+            self.cropper = albumentations.CenterCrop(height=crop_side_len, width=crop_side_len)
+        else:
+            self.cropper = albumentations.RandomCrop(height=crop_side_len, width=crop_side_len)
+        image = self.cropper(image=image)["image"]
+        image = self.image_rescaler(image=image)["image"]
+        if self.pil_interpolation:
+            image_pil = PIL.Image.fromarray(image)
+            LR_image = self.degradation_process(image_pil)
+            LR_image = np.array(LR_image).astype(np.uint8)
+        else:
+            LR_image = self.degradation_process(image=image)["image"]
+        example["image"] = (image/127.5 - 1.0).astype(np.float32)
+        example["LR_image"] = (LR_image/127.5 - 1.0).astype(np.float32)
+        return example
+class ImageNetSRTrain(ImageNetSR):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def get_base(self):
+        with open("ldm/data/imagenet_train_hr_indices.p", "rb") as f:
+            indices = pickle.load(f)
+        dset = ImageNetTrain(process_images=False,)
+        return Subset(dset, indices)
+class ImageNetSRValidation(ImageNetSR):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def get_base(self):
+        with open("ldm/data/imagenet_val_hr_indices.p", "rb") as f:
+            indices = pickle.load(f)
+        dset = ImageNetValidation(process_images=False,)
+        return Subset(dset, indices)

gligen/ldm/data/imagenet_clsidx_to_label.txt ADDED Viewed

	@@ -0,0 +1,1000 @@

+ 0: 'tench, Tinca tinca',
+ 1: 'goldfish, Carassius auratus',
+ 2: 'great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias',
+ 3: 'tiger shark, Galeocerdo cuvieri',
+ 4: 'hammerhead, hammerhead shark',
+ 5: 'electric ray, crampfish, numbfish, torpedo',
+ 6: 'stingray',
+ 7: 'cock',
+ 8: 'hen',
+ 9: 'ostrich, Struthio camelus',
+ 10: 'brambling, Fringilla montifringilla',
+ 11: 'goldfinch, Carduelis carduelis',
+ 12: 'house finch, linnet, Carpodacus mexicanus',
+ 13: 'junco, snowbird',
+ 14: 'indigo bunting, indigo finch, indigo bird, Passerina cyanea',
+ 15: 'robin, American robin, Turdus migratorius',
+ 16: 'bulbul',
+ 17: 'jay',
+ 18: 'magpie',
+ 19: 'chickadee',
+ 20: 'water ouzel, dipper',
+ 21: 'kite',
+ 22: 'bald eagle, American eagle, Haliaeetus leucocephalus',
+ 23: 'vulture',
+ 24: 'great grey owl, great gray owl, Strix nebulosa',
+ 25: 'European fire salamander, Salamandra salamandra',
+ 26: 'common newt, Triturus vulgaris',
+ 27: 'eft',
+ 28: 'spotted salamander, Ambystoma maculatum',
+ 29: 'axolotl, mud puppy, Ambystoma mexicanum',
+ 30: 'bullfrog, Rana catesbeiana',
+ 31: 'tree frog, tree-frog',
+ 32: 'tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui',
+ 33: 'loggerhead, loggerhead turtle, Caretta caretta',
+ 34: 'leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea',
+ 35: 'mud turtle',
+ 36: 'terrapin',
+ 37: 'box turtle, box tortoise',
+ 38: 'banded gecko',
+ 39: 'common iguana, iguana, Iguana iguana',
+ 40: 'American chameleon, anole, Anolis carolinensis',
+ 41: 'whiptail, whiptail lizard',
+ 42: 'agama',
+ 43: 'frilled lizard, Chlamydosaurus kingi',
+ 44: 'alligator lizard',
+ 45: 'Gila monster, Heloderma suspectum',
+ 46: 'green lizard, Lacerta viridis',
+ 47: 'African chameleon, Chamaeleo chamaeleon',
+ 48: 'Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis',
+ 49: 'African crocodile, Nile crocodile, Crocodylus niloticus',
+ 50: 'American alligator, Alligator mississipiensis',
+ 51: 'triceratops',
+ 52: 'thunder snake, worm snake, Carphophis amoenus',
+ 53: 'ringneck snake, ring-necked snake, ring snake',
+ 54: 'hognose snake, puff adder, sand viper',
+ 55: 'green snake, grass snake',
+ 56: 'king snake, kingsnake',
+ 57: 'garter snake, grass snake',
+ 58: 'water snake',
+ 59: 'vine snake',
+ 60: 'night snake, Hypsiglena torquata',
+ 61: 'boa constrictor, Constrictor constrictor',
+ 62: 'rock python, rock snake, Python sebae',
+ 63: 'Indian cobra, Naja naja',
+ 64: 'green mamba',
+ 65: 'sea snake',
+ 66: 'horned viper, cerastes, sand viper, horned asp, Cerastes cornutus',
+ 67: 'diamondback, diamondback rattlesnake, Crotalus adamanteus',
+ 68: 'sidewinder, horned rattlesnake, Crotalus cerastes',
+ 69: 'trilobite',
+ 70: 'harvestman, daddy longlegs, Phalangium opilio',
+ 71: 'scorpion',
+ 72: 'black and gold garden spider, Argiope aurantia',
+ 73: 'barn spider, Araneus cavaticus',
+ 74: 'garden spider, Aranea diademata',
+ 75: 'black widow, Latrodectus mactans',
+ 76: 'tarantula',
+ 77: 'wolf spider, hunting spider',
+ 78: 'tick',
+ 79: 'centipede',
+ 80: 'black grouse',
+ 81: 'ptarmigan',
+ 82: 'ruffed grouse, partridge, Bonasa umbellus',
+ 83: 'prairie chicken, prairie grouse, prairie fowl',
+ 84: 'peacock',
+ 85: 'quail',
+ 86: 'partridge',
+ 87: 'African grey, African gray, Psittacus erithacus',
+ 88: 'macaw',
+ 89: 'sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita',
+ 90: 'lorikeet',
+ 91: 'coucal',
+ 92: 'bee eater',
+ 93: 'hornbill',
+ 94: 'hummingbird',
+ 95: 'jacamar',
+ 96: 'toucan',
+ 97: 'drake',
+ 98: 'red-breasted merganser, Mergus serrator',
+ 99: 'goose',
+ 100: 'black swan, Cygnus atratus',
+ 101: 'tusker',
+ 102: 'echidna, spiny anteater, anteater',
+ 103: 'platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus',
+ 104: 'wallaby, brush kangaroo',
+ 105: 'koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus',
+ 106: 'wombat',
+ 107: 'jellyfish',
+ 108: 'sea anemone, anemone',
+ 109: 'brain coral',
+ 110: 'flatworm, platyhelminth',
+ 111: 'nematode, nematode worm, roundworm',
+ 112: 'conch',
+ 113: 'snail',
+ 114: 'slug',
+ 115: 'sea slug, nudibranch',
+ 116: 'chiton, coat-of-mail shell, sea cradle, polyplacophore',
+ 117: 'chambered nautilus, pearly nautilus, nautilus',
+ 118: 'Dungeness crab, Cancer magister',
+ 119: 'rock crab, Cancer irroratus',
+ 120: 'fiddler crab',
+ 121: 'king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica',
+ 122: 'American lobster, Northern lobster, Maine lobster, Homarus americanus',
+ 123: 'spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish',
+ 124: 'crayfish, crawfish, crawdad, crawdaddy',
+ 125: 'hermit crab',
+ 126: 'isopod',
+ 127: 'white stork, Ciconia ciconia',
+ 128: 'black stork, Ciconia nigra',
+ 129: 'spoonbill',
+ 130: 'flamingo',
+ 131: 'little blue heron, Egretta caerulea',
+ 132: 'American egret, great white heron, Egretta albus',
+ 133: 'bittern',
+ 134: 'crane',
+ 135: 'limpkin, Aramus pictus',
+ 136: 'European gallinule, Porphyrio porphyrio',
+ 137: 'American coot, marsh hen, mud hen, water hen, Fulica americana',
+ 138: 'bustard',
+ 139: 'ruddy turnstone, Arenaria interpres',
+ 140: 'red-backed sandpiper, dunlin, Erolia alpina',
+ 141: 'redshank, Tringa totanus',
+ 142: 'dowitcher',
+ 143: 'oystercatcher, oyster catcher',
+ 144: 'pelican',
+ 145: 'king penguin, Aptenodytes patagonica',
+ 146: 'albatross, mollymawk',
+ 147: 'grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus',
+ 148: 'killer whale, killer, orca, grampus, sea wolf, Orcinus orca',
+ 149: 'dugong, Dugong dugon',
+ 150: 'sea lion',
+ 151: 'Chihuahua',
+ 152: 'Japanese spaniel',
+ 153: 'Maltese dog, Maltese terrier, Maltese',
+ 154: 'Pekinese, Pekingese, Peke',
+ 155: 'Shih-Tzu',
+ 156: 'Blenheim spaniel',
+ 157: 'papillon',
+ 158: 'toy terrier',
+ 159: 'Rhodesian ridgeback',
+ 160: 'Afghan hound, Afghan',
+ 161: 'basset, basset hound',
+ 162: 'beagle',
+ 163: 'bloodhound, sleuthhound',
+ 164: 'bluetick',
+ 165: 'black-and-tan coonhound',
+ 166: 'Walker hound, Walker foxhound',
+ 167: 'English foxhound',
+ 168: 'redbone',
+ 169: 'borzoi, Russian wolfhound',
+ 170: 'Irish wolfhound',
+ 171: 'Italian greyhound',
+ 172: 'whippet',
+ 173: 'Ibizan hound, Ibizan Podenco',
+ 174: 'Norwegian elkhound, elkhound',
+ 175: 'otterhound, otter hound',
+ 176: 'Saluki, gazelle hound',
+ 177: 'Scottish deerhound, deerhound',
+ 178: 'Weimaraner',
+ 179: 'Staffordshire bullterrier, Staffordshire bull terrier',
+ 180: 'American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier',
+ 181: 'Bedlington terrier',
+ 182: 'Border terrier',
+ 183: 'Kerry blue terrier',
+ 184: 'Irish terrier',
+ 185: 'Norfolk terrier',
+ 186: 'Norwich terrier',
+ 187: 'Yorkshire terrier',
+ 188: 'wire-haired fox terrier',
+ 189: 'Lakeland terrier',
+ 190: 'Sealyham terrier, Sealyham',
+ 191: 'Airedale, Airedale terrier',
+ 192: 'cairn, cairn terrier',
+ 193: 'Australian terrier',
+ 194: 'Dandie Dinmont, Dandie Dinmont terrier',
+ 195: 'Boston bull, Boston terrier',
+ 196: 'miniature schnauzer',
+ 197: 'giant schnauzer',
+ 198: 'standard schnauzer',
+ 199: 'Scotch terrier, Scottish terrier, Scottie',
+ 200: 'Tibetan terrier, chrysanthemum dog',
+ 201: 'silky terrier, Sydney silky',
+ 202: 'soft-coated wheaten terrier',
+ 203: 'West Highland white terrier',
+ 204: 'Lhasa, Lhasa apso',
+ 205: 'flat-coated retriever',
+ 206: 'curly-coated retriever',
+ 207: 'golden retriever',
+ 208: 'Labrador retriever',
+ 209: 'Chesapeake Bay retriever',
+ 210: 'German short-haired pointer',
+ 211: 'vizsla, Hungarian pointer',
+ 212: 'English setter',
+ 213: 'Irish setter, red setter',
+ 214: 'Gordon setter',
+ 215: 'Brittany spaniel',
+ 216: 'clumber, clumber spaniel',
+ 217: 'English springer, English springer spaniel',
+ 218: 'Welsh springer spaniel',
+ 219: 'cocker spaniel, English cocker spaniel, cocker',
+ 220: 'Sussex spaniel',
+ 221: 'Irish water spaniel',
+ 222: 'kuvasz',
+ 223: 'schipperke',
+ 224: 'groenendael',
+ 225: 'malinois',
+ 226: 'briard',
+ 227: 'kelpie',
+ 228: 'komondor',
+ 229: 'Old English sheepdog, bobtail',
+ 230: 'Shetland sheepdog, Shetland sheep dog, Shetland',
+ 231: 'collie',
+ 232: 'Border collie',
+ 233: 'Bouvier des Flandres, Bouviers des Flandres',
+ 234: 'Rottweiler',
+ 235: 'German shepherd, German shepherd dog, German police dog, alsatian',
+ 236: 'Doberman, Doberman pinscher',
+ 237: 'miniature pinscher',
+ 238: 'Greater Swiss Mountain dog',
+ 239: 'Bernese mountain dog',
+ 240: 'Appenzeller',
+ 241: 'EntleBucher',
+ 242: 'boxer',
+ 243: 'bull mastiff',
+ 244: 'Tibetan mastiff',
+ 245: 'French bulldog',
+ 246: 'Great Dane',
+ 247: 'Saint Bernard, St Bernard',
+ 248: 'Eskimo dog, husky',
+ 249: 'malamute, malemute, Alaskan malamute',
+ 250: 'Siberian husky',
+ 251: 'dalmatian, coach dog, carriage dog',
+ 252: 'affenpinscher, monkey pinscher, monkey dog',
+ 253: 'basenji',
+ 254: 'pug, pug-dog',
+ 255: 'Leonberg',
+ 256: 'Newfoundland, Newfoundland dog',
+ 257: 'Great Pyrenees',
+ 258: 'Samoyed, Samoyede',
+ 259: 'Pomeranian',
+ 260: 'chow, chow chow',
+ 261: 'keeshond',
+ 262: 'Brabancon griffon',
+ 263: 'Pembroke, Pembroke Welsh corgi',
+ 264: 'Cardigan, Cardigan Welsh corgi',
+ 265: 'toy poodle',
+ 266: 'miniature poodle',
+ 267: 'standard poodle',
+ 268: 'Mexican hairless',
+ 269: 'timber wolf, grey wolf, gray wolf, Canis lupus',
+ 270: 'white wolf, Arctic wolf, Canis lupus tundrarum',
+ 271: 'red wolf, maned wolf, Canis rufus, Canis niger',
+ 272: 'coyote, prairie wolf, brush wolf, Canis latrans',
+ 273: 'dingo, warrigal, warragal, Canis dingo',
+ 274: 'dhole, Cuon alpinus',
+ 275: 'African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus',
+ 276: 'hyena, hyaena',
+ 277: 'red fox, Vulpes vulpes',
+ 278: 'kit fox, Vulpes macrotis',
+ 279: 'Arctic fox, white fox, Alopex lagopus',
+ 280: 'grey fox, gray fox, Urocyon cinereoargenteus',
+ 281: 'tabby, tabby cat',
+ 282: 'tiger cat',
+ 283: 'Persian cat',
+ 284: 'Siamese cat, Siamese',
+ 285: 'Egyptian cat',
+ 286: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor',
+ 287: 'lynx, catamount',
+ 288: 'leopard, Panthera pardus',
+ 289: 'snow leopard, ounce, Panthera uncia',
+ 290: 'jaguar, panther, Panthera onca, Felis onca',
+ 291: 'lion, king of beasts, Panthera leo',
+ 292: 'tiger, Panthera tigris',
+ 293: 'cheetah, chetah, Acinonyx jubatus',
+ 294: 'brown bear, bruin, Ursus arctos',
+ 295: 'American black bear, black bear, Ursus americanus, Euarctos americanus',
+ 296: 'ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus',
+ 297: 'sloth bear, Melursus ursinus, Ursus ursinus',
+ 298: 'mongoose',
+ 299: 'meerkat, mierkat',
+ 300: 'tiger beetle',
+ 301: 'ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle',
+ 302: 'ground beetle, carabid beetle',
+ 303: 'long-horned beetle, longicorn, longicorn beetle',
+ 304: 'leaf beetle, chrysomelid',
+ 305: 'dung beetle',
+ 306: 'rhinoceros beetle',
+ 307: 'weevil',
+ 308: 'fly',
+ 309: 'bee',
+ 310: 'ant, emmet, pismire',
+ 311: 'grasshopper, hopper',
+ 312: 'cricket',
+ 313: 'walking stick, walkingstick, stick insect',
+ 314: 'cockroach, roach',
+ 315: 'mantis, mantid',
+ 316: 'cicada, cicala',
+ 317: 'leafhopper',
+ 318: 'lacewing, lacewing fly',
+ 319: "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+ 320: 'damselfly',
+ 321: 'admiral',
+ 322: 'ringlet, ringlet butterfly',
+ 323: 'monarch, monarch butterfly, milkweed butterfly, Danaus plexippus',
+ 324: 'cabbage butterfly',
+ 325: 'sulphur butterfly, sulfur butterfly',
+ 326: 'lycaenid, lycaenid butterfly',
+ 327: 'starfish, sea star',
+ 328: 'sea urchin',
+ 329: 'sea cucumber, holothurian',
+ 330: 'wood rabbit, cottontail, cottontail rabbit',
+ 331: 'hare',
+ 332: 'Angora, Angora rabbit',
+ 333: 'hamster',
+ 334: 'porcupine, hedgehog',
+ 335: 'fox squirrel, eastern fox squirrel, Sciurus niger',
+ 336: 'marmot',
+ 337: 'beaver',
+ 338: 'guinea pig, Cavia cobaya',
+ 339: 'sorrel',
+ 340: 'zebra',
+ 341: 'hog, pig, grunter, squealer, Sus scrofa',
+ 342: 'wild boar, boar, Sus scrofa',
+ 343: 'warthog',
+ 344: 'hippopotamus, hippo, river horse, Hippopotamus amphibius',
+ 345: 'ox',
+ 346: 'water buffalo, water ox, Asiatic buffalo, Bubalus bubalis',
+ 347: 'bison',
+ 348: 'ram, tup',
+ 349: 'bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis',
+ 350: 'ibex, Capra ibex',
+ 351: 'hartebeest',
+ 352: 'impala, Aepyceros melampus',
+ 353: 'gazelle',
+ 354: 'Arabian camel, dromedary, Camelus dromedarius',
+ 355: 'llama',
+ 356: 'weasel',
+ 357: 'mink',
+ 358: 'polecat, fitch, foulmart, foumart, Mustela putorius',
+ 359: 'black-footed ferret, ferret, Mustela nigripes',
+ 360: 'otter',
+ 361: 'skunk, polecat, wood pussy',
+ 362: 'badger',
+ 363: 'armadillo',
+ 364: 'three-toed sloth, ai, Bradypus tridactylus',
+ 365: 'orangutan, orang, orangutang, Pongo pygmaeus',
+ 366: 'gorilla, Gorilla gorilla',
+ 367: 'chimpanzee, chimp, Pan troglodytes',
+ 368: 'gibbon, Hylobates lar',
+ 369: 'siamang, Hylobates syndactylus, Symphalangus syndactylus',
+ 370: 'guenon, guenon monkey',
+ 371: 'patas, hussar monkey, Erythrocebus patas',
+ 372: 'baboon',
+ 373: 'macaque',
+ 374: 'langur',
+ 375: 'colobus, colobus monkey',
+ 376: 'proboscis monkey, Nasalis larvatus',
+ 377: 'marmoset',
+ 378: 'capuchin, ringtail, Cebus capucinus',
+ 379: 'howler monkey, howler',
+ 380: 'titi, titi monkey',
+ 381: 'spider monkey, Ateles geoffroyi',
+ 382: 'squirrel monkey, Saimiri sciureus',
+ 383: 'Madagascar cat, ring-tailed lemur, Lemur catta',
+ 384: 'indri, indris, Indri indri, Indri brevicaudatus',
+ 385: 'Indian elephant, Elephas maximus',
+ 386: 'African elephant, Loxodonta africana',
+ 387: 'lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens',
+ 388: 'giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca',
+ 389: 'barracouta, snoek',
+ 390: 'eel',
+ 391: 'coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch',
+ 392: 'rock beauty, Holocanthus tricolor',
+ 393: 'anemone fish',
+ 394: 'sturgeon',
+ 395: 'gar, garfish, garpike, billfish, Lepisosteus osseus',
+ 396: 'lionfish',
+ 397: 'puffer, pufferfish, blowfish, globefish',
+ 398: 'abacus',
+ 399: 'abaya',
+ 400: "academic gown, academic robe, judge's robe",
+ 401: 'accordion, piano accordion, squeeze box',
+ 402: 'acoustic guitar',
+ 403: 'aircraft carrier, carrier, flattop, attack aircraft carrier',
+ 404: 'airliner',
+ 405: 'airship, dirigible',
+ 406: 'altar',
+ 407: 'ambulance',
+ 408: 'amphibian, amphibious vehicle',
+ 409: 'analog clock',
+ 410: 'apiary, bee house',
+ 411: 'apron',
+ 412: 'ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin',
+ 413: 'assault rifle, assault gun',
+ 414: 'backpack, back pack, knapsack, packsack, rucksack, haversack',
+ 415: 'bakery, bakeshop, bakehouse',
+ 416: 'balance beam, beam',
+ 417: 'balloon',
+ 418: 'ballpoint, ballpoint pen, ballpen, Biro',
+ 419: 'Band Aid',
+ 420: 'banjo',
+ 421: 'bannister, banister, balustrade, balusters, handrail',
+ 422: 'barbell',
+ 423: 'barber chair',
+ 424: 'barbershop',
+ 425: 'barn',
+ 426: 'barometer',
+ 427: 'barrel, cask',
+ 428: 'barrow, garden cart, lawn cart, wheelbarrow',
+ 429: 'baseball',
+ 430: 'basketball',
+ 431: 'bassinet',
+ 432: 'bassoon',
+ 433: 'bathing cap, swimming cap',
+ 434: 'bath towel',
+ 435: 'bathtub, bathing tub, bath, tub',
+ 436: 'beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon',
+ 437: 'beacon, lighthouse, beacon light, pharos',
+ 438: 'beaker',
+ 439: 'bearskin, busby, shako',
+ 440: 'beer bottle',
+ 441: 'beer glass',
+ 442: 'bell cote, bell cot',
+ 443: 'bib',
+ 444: 'bicycle-built-for-two, tandem bicycle, tandem',
+ 445: 'bikini, two-piece',
+ 446: 'binder, ring-binder',
+ 447: 'binoculars, field glasses, opera glasses',
+ 448: 'birdhouse',
+ 449: 'boathouse',
+ 450: 'bobsled, bobsleigh, bob',
+ 451: 'bolo tie, bolo, bola tie, bola',
+ 452: 'bonnet, poke bonnet',
+ 453: 'bookcase',
+ 454: 'bookshop, bookstore, bookstall',
+ 455: 'bottlecap',
+ 456: 'bow',
+ 457: 'bow tie, bow-tie, bowtie',
+ 458: 'brass, memorial tablet, plaque',
+ 459: 'brassiere, bra, bandeau',
+ 460: 'breakwater, groin, groyne, mole, bulwark, seawall, jetty',
+ 461: 'breastplate, aegis, egis',
+ 462: 'broom',
+ 463: 'bucket, pail',
+ 464: 'buckle',
+ 465: 'bulletproof vest',
+ 466: 'bullet train, bullet',
+ 467: 'butcher shop, meat market',
+ 468: 'cab, hack, taxi, taxicab',
+ 469: 'caldron, cauldron',
+ 470: 'candle, taper, wax light',
+ 471: 'cannon',
+ 472: 'canoe',
+ 473: 'can opener, tin opener',
+ 474: 'cardigan',
+ 475: 'car mirror',
+ 476: 'carousel, carrousel, merry-go-round, roundabout, whirligig',
+ 477: "carpenter's kit, tool kit",
+ 478: 'carton',
+ 479: 'car wheel',
+ 480: 'cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM',
+ 481: 'cassette',
+ 482: 'cassette player',
+ 483: 'castle',
+ 484: 'catamaran',
+ 485: 'CD player',
+ 486: 'cello, violoncello',
+ 487: 'cellular telephone, cellular phone, cellphone, cell, mobile phone',
+ 488: 'chain',
+ 489: 'chainlink fence',
+ 490: 'chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour',
+ 491: 'chain saw, chainsaw',
+ 492: 'chest',
+ 493: 'chiffonier, commode',
+ 494: 'chime, bell, gong',
+ 495: 'china cabinet, china closet',
+ 496: 'Christmas stocking',
+ 497: 'church, church building',
+ 498: 'cinema, movie theater, movie theatre, movie house, picture palace',
+ 499: 'cleaver, meat cleaver, chopper',
+ 500: 'cliff dwelling',
+ 501: 'cloak',
+ 502: 'clog, geta, patten, sabot',
+ 503: 'cocktail shaker',
+ 504: 'coffee mug',
+ 505: 'coffeepot',
+ 506: 'coil, spiral, volute, whorl, helix',
+ 507: 'combination lock',
+ 508: 'computer keyboard, keypad',
+ 509: 'confectionery, confectionary, candy store',
+ 510: 'container ship, containership, container vessel',
+ 511: 'convertible',
+ 512: 'corkscrew, bottle screw',
+ 513: 'cornet, horn, trumpet, trump',
+ 514: 'cowboy boot',
+ 515: 'cowboy hat, ten-gallon hat',
+ 516: 'cradle',
+ 517: 'crane',
+ 518: 'crash helmet',
+ 519: 'crate',
+ 520: 'crib, cot',
+ 521: 'Crock Pot',
+ 522: 'croquet ball',
+ 523: 'crutch',
+ 524: 'cuirass',
+ 525: 'dam, dike, dyke',
+ 526: 'desk',
+ 527: 'desktop computer',
+ 528: 'dial telephone, dial phone',
+ 529: 'diaper, nappy, napkin',
+ 530: 'digital clock',
+ 531: 'digital watch',
+ 532: 'dining table, board',
+ 533: 'dishrag, dishcloth',
+ 534: 'dishwasher, dish washer, dishwashing machine',
+ 535: 'disk brake, disc brake',
+ 536: 'dock, dockage, docking facility',
+ 537: 'dogsled, dog sled, dog sleigh',
+ 538: 'dome',
+ 539: 'doormat, welcome mat',
+ 540: 'drilling platform, offshore rig',
+ 541: 'drum, membranophone, tympan',
+ 542: 'drumstick',
+ 543: 'dumbbell',
+ 544: 'Dutch oven',
+ 545: 'electric fan, blower',
+ 546: 'electric guitar',
+ 547: 'electric locomotive',
+ 548: 'entertainment center',
+ 549: 'envelope',
+ 550: 'espresso maker',
+ 551: 'face powder',
+ 552: 'feather boa, boa',
+ 553: 'file, file cabinet, filing cabinet',
+ 554: 'fireboat',
+ 555: 'fire engine, fire truck',
+ 556: 'fire screen, fireguard',
+ 557: 'flagpole, flagstaff',
+ 558: 'flute, transverse flute',
+ 559: 'folding chair',
+ 560: 'football helmet',
+ 561: 'forklift',
+ 562: 'fountain',
+ 563: 'fountain pen',
+ 564: 'four-poster',
+ 565: 'freight car',
+ 566: 'French horn, horn',
+ 567: 'frying pan, frypan, skillet',
+ 568: 'fur coat',
+ 569: 'garbage truck, dustcart',
+ 570: 'gasmask, respirator, gas helmet',
+ 571: 'gas pump, gasoline pump, petrol pump, island dispenser',
+ 572: 'goblet',
+ 573: 'go-kart',
+ 574: 'golf ball',
+ 575: 'golfcart, golf cart',
+ 576: 'gondola',
+ 577: 'gong, tam-tam',
+ 578: 'gown',
+ 579: 'grand piano, grand',
+ 580: 'greenhouse, nursery, glasshouse',
+ 581: 'grille, radiator grille',
+ 582: 'grocery store, grocery, food market, market',
+ 583: 'guillotine',
+ 584: 'hair slide',
+ 585: 'hair spray',
+ 586: 'half track',
+ 587: 'hammer',
+ 588: 'hamper',
+ 589: 'hand blower, blow dryer, blow drier, hair dryer, hair drier',
+ 590: 'hand-held computer, hand-held microcomputer',
+ 591: 'handkerchief, hankie, hanky, hankey',
+ 592: 'hard disc, hard disk, fixed disk',
+ 593: 'harmonica, mouth organ, harp, mouth harp',
+ 594: 'harp',
+ 595: 'harvester, reaper',
+ 596: 'hatchet',
+ 597: 'holster',
+ 598: 'home theater, home theatre',
+ 599: 'honeycomb',
+ 600: 'hook, claw',
+ 601: 'hoopskirt, crinoline',
+ 602: 'horizontal bar, high bar',
+ 603: 'horse cart, horse-cart',
+ 604: 'hourglass',
+ 605: 'iPod',
+ 606: 'iron, smoothing iron',
+ 607: "jack-o'-lantern",
+ 608: 'jean, blue jean, denim',
+ 609: 'jeep, landrover',
+ 610: 'jersey, T-shirt, tee shirt',
+ 611: 'jigsaw puzzle',
+ 612: 'jinrikisha, ricksha, rickshaw',
+ 613: 'joystick',
+ 614: 'kimono',
+ 615: 'knee pad',
+ 616: 'knot',
+ 617: 'lab coat, laboratory coat',
+ 618: 'ladle',
+ 619: 'lampshade, lamp shade',
+ 620: 'laptop, laptop computer',
+ 621: 'lawn mower, mower',
+ 622: 'lens cap, lens cover',
+ 623: 'letter opener, paper knife, paperknife',
+ 624: 'library',
+ 625: 'lifeboat',
+ 626: 'lighter, light, igniter, ignitor',
+ 627: 'limousine, limo',
+ 628: 'liner, ocean liner',
+ 629: 'lipstick, lip rouge',
+ 630: 'Loafer',
+ 631: 'lotion',
+ 632: 'loudspeaker, speaker, speaker unit, loudspeaker system, speaker system',
+ 633: "loupe, jeweler's loupe",
+ 634: 'lumbermill, sawmill',
+ 635: 'magnetic compass',
+ 636: 'mailbag, postbag',
+ 637: 'mailbox, letter box',
+ 638: 'maillot',
+ 639: 'maillot, tank suit',
+ 640: 'manhole cover',
+ 641: 'maraca',
+ 642: 'marimba, xylophone',
+ 643: 'mask',
+ 644: 'matchstick',
+ 645: 'maypole',
+ 646: 'maze, labyrinth',
+ 647: 'measuring cup',
+ 648: 'medicine chest, medicine cabinet',
+ 649: 'megalith, megalithic structure',
+ 650: 'microphone, mike',
+ 651: 'microwave, microwave oven',
+ 652: 'military uniform',
+ 653: 'milk can',
+ 654: 'minibus',
+ 655: 'miniskirt, mini',
+ 656: 'minivan',
+ 657: 'missile',
+ 658: 'mitten',
+ 659: 'mixing bowl',
+ 660: 'mobile home, manufactured home',
+ 661: 'Model T',
+ 662: 'modem',
+ 663: 'monastery',
+ 664: 'monitor',
+ 665: 'moped',
+ 666: 'mortar',
+ 667: 'mortarboard',
+ 668: 'mosque',
+ 669: 'mosquito net',
+ 670: 'motor scooter, scooter',
+ 671: 'mountain bike, all-terrain bike, off-roader',
+ 672: 'mountain tent',
+ 673: 'mouse, computer mouse',
+ 674: 'mousetrap',
+ 675: 'moving van',
+ 676: 'muzzle',
+ 677: 'nail',
+ 678: 'neck brace',
+ 679: 'necklace',
+ 680: 'nipple',
+ 681: 'notebook, notebook computer',
+ 682: 'obelisk',
+ 683: 'oboe, hautboy, hautbois',
+ 684: 'ocarina, sweet potato',
+ 685: 'odometer, hodometer, mileometer, milometer',
+ 686: 'oil filter',
+ 687: 'organ, pipe organ',
+ 688: 'oscilloscope, scope, cathode-ray oscilloscope, CRO',
+ 689: 'overskirt',
+ 690: 'oxcart',
+ 691: 'oxygen mask',
+ 692: 'packet',
+ 693: 'paddle, boat paddle',
+ 694: 'paddlewheel, paddle wheel',
+ 695: 'padlock',
+ 696: 'paintbrush',
+ 697: "pajama, pyjama, pj's, jammies",
+ 698: 'palace',
+ 699: 'panpipe, pandean pipe, syrinx',
+ 700: 'paper towel',
+ 701: 'parachute, chute',
+ 702: 'parallel bars, bars',
+ 703: 'park bench',
+ 704: 'parking meter',
+ 705: 'passenger car, coach, carriage',
+ 706: 'patio, terrace',
+ 707: 'pay-phone, pay-station',
+ 708: 'pedestal, plinth, footstall',
+ 709: 'pencil box, pencil case',
+ 710: 'pencil sharpener',
+ 711: 'perfume, essence',
+ 712: 'Petri dish',
+ 713: 'photocopier',
+ 714: 'pick, plectrum, plectron',
+ 715: 'pickelhaube',
+ 716: 'picket fence, paling',
+ 717: 'pickup, pickup truck',
+ 718: 'pier',
+ 719: 'piggy bank, penny bank',
+ 720: 'pill bottle',
+ 721: 'pillow',
+ 722: 'ping-pong ball',
+ 723: 'pinwheel',
+ 724: 'pirate, pirate ship',
+ 725: 'pitcher, ewer',
+ 726: "plane, carpenter's plane, woodworking plane",
+ 727: 'planetarium',
+ 728: 'plastic bag',
+ 729: 'plate rack',
+ 730: 'plow, plough',
+ 731: "plunger, plumber's helper",
+ 732: 'Polaroid camera, Polaroid Land camera',
+ 733: 'pole',
+ 734: 'police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria',
+ 735: 'poncho',
+ 736: 'pool table, billiard table, snooker table',
+ 737: 'pop bottle, soda bottle',
+ 738: 'pot, flowerpot',
+ 739: "potter's wheel",
+ 740: 'power drill',
+ 741: 'prayer rug, prayer mat',
+ 742: 'printer',
+ 743: 'prison, prison house',
+ 744: 'projectile, missile',
+ 745: 'projector',
+ 746: 'puck, hockey puck',
+ 747: 'punching bag, punch bag, punching ball, punchball',
+ 748: 'purse',
+ 749: 'quill, quill pen',
+ 750: 'quilt, comforter, comfort, puff',
+ 751: 'racer, race car, racing car',
+ 752: 'racket, racquet',
+ 753: 'radiator',
+ 754: 'radio, wireless',
+ 755: 'radio telescope, radio reflector',
+ 756: 'rain barrel',
+ 757: 'recreational vehicle, RV, R.V.',
+ 758: 'reel',
+ 759: 'reflex camera',
+ 760: 'refrigerator, icebox',
+ 761: 'remote control, remote',
+ 762: 'restaurant, eating house, eating place, eatery',
+ 763: 'revolver, six-gun, six-shooter',
+ 764: 'rifle',
+ 765: 'rocking chair, rocker',
+ 766: 'rotisserie',
+ 767: 'rubber eraser, rubber, pencil eraser',
+ 768: 'rugby ball',
+ 769: 'rule, ruler',
+ 770: 'running shoe',
+ 771: 'safe',
+ 772: 'safety pin',
+ 773: 'saltshaker, salt shaker',
+ 774: 'sandal',
+ 775: 'sarong',
+ 776: 'sax, saxophone',
+ 777: 'scabbard',
+ 778: 'scale, weighing machine',
+ 779: 'school bus',
+ 780: 'schooner',
+ 781: 'scoreboard',
+ 782: 'screen, CRT screen',
+ 783: 'screw',
+ 784: 'screwdriver',
+ 785: 'seat belt, seatbelt',
+ 786: 'sewing machine',
+ 787: 'shield, buckler',
+ 788: 'shoe shop, shoe-shop, shoe store',
+ 789: 'shoji',
+ 790: 'shopping basket',
+ 791: 'shopping cart',
+ 792: 'shovel',
+ 793: 'shower cap',
+ 794: 'shower curtain',
+ 795: 'ski',
+ 796: 'ski mask',
+ 797: 'sleeping bag',
+ 798: 'slide rule, slipstick',
+ 799: 'sliding door',
+ 800: 'slot, one-armed bandit',
+ 801: 'snorkel',
+ 802: 'snowmobile',
+ 803: 'snowplow, snowplough',
+ 804: 'soap dispenser',
+ 805: 'soccer ball',
+ 806: 'sock',
+ 807: 'solar dish, solar collector, solar furnace',
+ 808: 'sombrero',
+ 809: 'soup bowl',
+ 810: 'space bar',
+ 811: 'space heater',
+ 812: 'space shuttle',
+ 813: 'spatula',
+ 814: 'speedboat',
+ 815: "spider web, spider's web",
+ 816: 'spindle',
+ 817: 'sports car, sport car',
+ 818: 'spotlight, spot',
+ 819: 'stage',
+ 820: 'steam locomotive',
+ 821: 'steel arch bridge',
+ 822: 'steel drum',
+ 823: 'stethoscope',
+ 824: 'stole',
+ 825: 'stone wall',
+ 826: 'stopwatch, stop watch',
+ 827: 'stove',
+ 828: 'strainer',
+ 829: 'streetcar, tram, tramcar, trolley, trolley car',
+ 830: 'stretcher',
+ 831: 'studio couch, day bed',
+ 832: 'stupa, tope',
+ 833: 'submarine, pigboat, sub, U-boat',
+ 834: 'suit, suit of clothes',
+ 835: 'sundial',
+ 836: 'sunglass',
+ 837: 'sunglasses, dark glasses, shades',
+ 838: 'sunscreen, sunblock, sun blocker',
+ 839: 'suspension bridge',
+ 840: 'swab, swob, mop',
+ 841: 'sweatshirt',
+ 842: 'swimming trunks, bathing trunks',
+ 843: 'swing',
+ 844: 'switch, electric switch, electrical switch',
+ 845: 'syringe',
+ 846: 'table lamp',
+ 847: 'tank, army tank, armored combat vehicle, armoured combat vehicle',
+ 848: 'tape player',
+ 849: 'teapot',
+ 850: 'teddy, teddy bear',
+ 851: 'television, television system',
+ 852: 'tennis ball',
+ 853: 'thatch, thatched roof',
+ 854: 'theater curtain, theatre curtain',
+ 855: 'thimble',
+ 856: 'thresher, thrasher, threshing machine',
+ 857: 'throne',
+ 858: 'tile roof',
+ 859: 'toaster',
+ 860: 'tobacco shop, tobacconist shop, tobacconist',
+ 861: 'toilet seat',
+ 862: 'torch',
+ 863: 'totem pole',
+ 864: 'tow truck, tow car, wrecker',
+ 865: 'toyshop',
+ 866: 'tractor',
+ 867: 'trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi',
+ 868: 'tray',
+ 869: 'trench coat',
+ 870: 'tricycle, trike, velocipede',
+ 871: 'trimaran',
+ 872: 'tripod',
+ 873: 'triumphal arch',
+ 874: 'trolleybus, trolley coach, trackless trolley',
+ 875: 'trombone',
+ 876: 'tub, vat',
+ 877: 'turnstile',
+ 878: 'typewriter keyboard',
+ 879: 'umbrella',
+ 880: 'unicycle, monocycle',
+ 881: 'upright, upright piano',
+ 882: 'vacuum, vacuum cleaner',
+ 883: 'vase',
+ 884: 'vault',
+ 885: 'velvet',
+ 886: 'vending machine',
+ 887: 'vestment',
+ 888: 'viaduct',
+ 889: 'violin, fiddle',
+ 890: 'volleyball',
+ 891: 'waffle iron',
+ 892: 'wall clock',
+ 893: 'wallet, billfold, notecase, pocketbook',
+ 894: 'wardrobe, closet, press',
+ 895: 'warplane, military plane',
+ 896: 'washbasin, handbasin, washbowl, lavabo, wash-hand basin',
+ 897: 'washer, automatic washer, washing machine',
+ 898: 'water bottle',
+ 899: 'water jug',
+ 900: 'water tower',
+ 901: 'whiskey jug',
+ 902: 'whistle',
+ 903: 'wig',
+ 904: 'window screen',
+ 905: 'window shade',
+ 906: 'Windsor tie',
+ 907: 'wine bottle',
+ 908: 'wing',
+ 909: 'wok',
+ 910: 'wooden spoon',
+ 911: 'wool, woolen, woollen',
+ 912: 'worm fence, snake fence, snake-rail fence, Virginia fence',
+ 913: 'wreck',
+ 914: 'yawl',
+ 915: 'yurt',
+ 916: 'web site, website, internet site, site',
+ 917: 'comic book',
+ 918: 'crossword puzzle, crossword',
+ 919: 'street sign',
+ 920: 'traffic light, traffic signal, stoplight',
+ 921: 'book jacket, dust cover, dust jacket, dust wrapper',
+ 922: 'menu',
+ 923: 'plate',
+ 924: 'guacamole',
+ 925: 'consomme',
+ 926: 'hot pot, hotpot',
+ 927: 'trifle',
+ 928: 'ice cream, icecream',
+ 929: 'ice lolly, lolly, lollipop, popsicle',
+ 930: 'French loaf',
+ 931: 'bagel, beigel',
+ 932: 'pretzel',
+ 933: 'cheeseburger',
+ 934: 'hotdog, hot dog, red hot',
+ 935: 'mashed potato',
+ 936: 'head cabbage',
+ 937: 'broccoli',
+ 938: 'cauliflower',
+ 939: 'zucchini, courgette',
+ 940: 'spaghetti squash',
+ 941: 'acorn squash',
+ 942: 'butternut squash',
+ 943: 'cucumber, cuke',
+ 944: 'artichoke, globe artichoke',
+ 945: 'bell pepper',
+ 946: 'cardoon',
+ 947: 'mushroom',
+ 948: 'Granny Smith',
+ 949: 'strawberry',
+ 950: 'orange',
+ 951: 'lemon',
+ 952: 'fig',
+ 953: 'pineapple, ananas',
+ 954: 'banana',
+ 955: 'jackfruit, jak, jack',
+ 956: 'custard apple',
+ 957: 'pomegranate',
+ 958: 'hay',
+ 959: 'carbonara',
+ 960: 'chocolate sauce, chocolate syrup',
+ 961: 'dough',
+ 962: 'meat loaf, meatloaf',
+ 963: 'pizza, pizza pie',
+ 964: 'potpie',
+ 965: 'burrito',
+ 966: 'red wine',
+ 967: 'espresso',
+ 968: 'cup',
+ 969: 'eggnog',
+ 970: 'alp',
+ 971: 'bubble',
+ 972: 'cliff, drop, drop-off',
+ 973: 'coral reef',
+ 974: 'geyser',
+ 975: 'lakeside, lakeshore',
+ 976: 'promontory, headland, head, foreland',
+ 977: 'sandbar, sand bar',
+ 978: 'seashore, coast, seacoast, sea-coast',
+ 979: 'valley, vale',
+ 980: 'volcano',
+ 981: 'ballplayer, baseball player',
+ 982: 'groom, bridegroom',
+ 983: 'scuba diver',
+ 984: 'rapeseed',
+ 985: 'daisy',
+ 986: "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+ 987: 'corn',
+ 988: 'acorn',
+ 989: 'hip, rose hip, rosehip',
+ 990: 'buckeye, horse chestnut, conker',
+ 991: 'coral fungus',
+ 992: 'agaric',
+ 993: 'gyromitra',
+ 994: 'stinkhorn, carrion fungus',
+ 995: 'earthstar',
+ 996: 'hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa',
+ 997: 'bolete',
+ 998: 'ear, spike, capitulum',
+ 999: 'toilet tissue, toilet paper, bathroom tissue'

gligen/ldm/data/imagenet_train_hr_indices.p ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f86ea1924a1522b20bc0f709a069cc65f09d5fc617a7a31af7aaa3839a5a4d73
+size 132

gligen/ldm/data/imagenet_val_hr_indices.p ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff1f5eb275a93c0fb53e227679f323ea1d024c87db296453296cebeef86fc0f4
+size 131

gligen/ldm/data/index_synset.yaml ADDED Viewed

	@@ -0,0 +1,1000 @@

+0: n01440764
+1: n01443537
+2: n01484850
+3: n01491361
+4: n01494475
+5: n01496331
+6: n01498041
+7: n01514668
+8: n07646067
+9: n01518878
+10: n01530575
+11: n01531178
+12: n01532829
+13: n01534433
+14: n01537544
+15: n01558993
+16: n01560419
+17: n01580077
+18: n01582220
+19: n01592084
+20: n01601694
+21: n13382471
+22: n01614925
+23: n01616318
+24: n01622779
+25: n01629819
+26: n01630670
+27: n01631663
+28: n01632458
+29: n01632777
+30: n01641577
+31: n01644373
+32: n01644900
+33: n01664065
+34: n01665541
+35: n01667114
+36: n01667778
+37: n01669191
+38: n01675722
+39: n01677366
+40: n01682714
+41: n01685808
+42: n01687978
+43: n01688243
+44: n01689811
+45: n01692333
+46: n01693334
+47: n01694178
+48: n01695060
+49: n01697457
+50: n01698640
+51: n01704323
+52: n01728572
+53: n01728920
+54: n01729322
+55: n01729977
+56: n01734418
+57: n01735189
+58: n01737021
+59: n01739381
+60: n01740131
+61: n01742172
+62: n01744401
+63: n01748264
+64: n01749939
+65: n01751748
+66: n01753488
+67: n01755581
+68: n01756291
+69: n01768244
+70: n01770081
+71: n01770393
+72: n01773157
+73: n01773549
+74: n01773797
+75: n01774384
+76: n01774750
+77: n01775062
+78: n04432308
+79: n01784675
+80: n01795545
+81: n01796340
+82: n01797886
+83: n01798484
+84: n01806143
+85: n07647321
+86: n07647496
+87: n01817953
+88: n01818515
+89: n01819313
+90: n01820546
+91: n01824575
+92: n01828970
+93: n01829413
+94: n01833805
+95: n01843065
+96: n01843383
+97: n01847000
+98: n01855032
+99: n07646821
+100: n01860187
+101: n01871265
+102: n01872772
+103: n01873310
+104: n01877812
+105: n01882714
+106: n01883070
+107: n01910747
+108: n01914609
+109: n01917289
+110: n01924916
+111: n01930112
+112: n01943899
+113: n01944390
+114: n13719102
+115: n01950731
+116: n01955084
+117: n01968897
+118: n01978287
+119: n01978455
+120: n01980166
+121: n01981276
+122: n01983481
+123: n01984695
+124: n01985128
+125: n01986214
+126: n01990800
+127: n02002556
+128: n02002724
+129: n02006656
+130: n02007558
+131: n02009229
+132: n02009912
+133: n02011460
+134: n03126707
+135: n02013706
+136: n02017213
+137: n02018207
+138: n02018795
+139: n02025239
+140: n02027492
+141: n02028035
+142: n02033041
+143: n02037110
+144: n02051845
+145: n02056570
+146: n02058221
+147: n02066245
+148: n02071294
+149: n02074367
+150: n02077923
+151: n08742578
+152: n02085782
+153: n02085936
+154: n02086079
+155: n02086240
+156: n02086646
+157: n02086910
+158: n02087046
+159: n02087394
+160: n02088094
+161: n02088238
+162: n02088364
+163: n02088466
+164: n02088632
+165: n02089078
+166: n02089867
+167: n02089973
+168: n02090379
+169: n02090622
+170: n02090721
+171: n02091032
+172: n02091134
+173: n02091244
+174: n02091467
+175: n02091635
+176: n02091831
+177: n02092002
+178: n02092339
+179: n02093256
+180: n02093428
+181: n02093647
+182: n02093754
+183: n02093859
+184: n02093991
+185: n02094114
+186: n02094258
+187: n02094433
+188: n02095314
+189: n02095570
+190: n02095889
+191: n02096051
+192: n02096177
+193: n02096294
+194: n02096437
+195: n02096585
+196: n02097047
+197: n02097130
+198: n02097209
+199: n02097298
+200: n02097474
+201: n02097658
+202: n02098105
+203: n02098286
+204: n02098413
+205: n02099267
+206: n02099429
+207: n02099601
+208: n02099712
+209: n02099849
+210: n02100236
+211: n02100583
+212: n02100735
+213: n02100877
+214: n02101006
+215: n02101388
+216: n02101556
+217: n02102040
+218: n02102177
+219: n02102318
+220: n02102480
+221: n02102973
+222: n02104029
+223: n02104365
+224: n02105056
+225: n02105162
+226: n02105251
+227: n02105412
+228: n02105505
+229: n02105641
+230: n02105855
+231: n02106030
+232: n02106166
+233: n02106382
+234: n02106550
+235: n02106662
+236: n02107142
+237: n02107312
+238: n02107574
+239: n02107683
+240: n02107908
+241: n02108000
+242: n02108089
+243: n02108422
+244: n02108551
+245: n02108915
+246: n02109047
+247: n02109525
+248: n02109961
+249: n02110063
+250: n02110185
+251: n02110341
+252: n02110627
+253: n02110806
+254: n02110958
+255: n02111129
+256: n02111277
+257: n02111500
+258: n02111889
+259: n02112018
+260: n02112137
+261: n02112350
+262: n02112706
+263: n02113023
+264: n02113186
+265: n02113624
+266: n02113712
+267: n02113799
+268: n02113978
+269: n02114367
+270: n02114548
+271: n02114712
+272: n02114855
+273: n02115641
+274: n02115913
+275: n02116738
+276: n02117135
+277: n02119022
+278: n02119789
+279: n02120079
+280: n02120505
+281: n02123045
+282: n02123159
+283: n02123394
+284: n02123597
+285: n02124075
+286: n02125311
+287: n02127052
+288: n02128385
+289: n02128757
+290: n02128925
+291: n02129165
+292: n02129604
+293: n02130308
+294: n02132136
+295: n02133161
+296: n02134084
+297: n02134418
+298: n02137549
+299: n02138441
+300: n02165105
+301: n02165456
+302: n02167151
+303: n02168699
+304: n02169497
+305: n02172182
+306: n02174001
+307: n02177972
+308: n03373237
+309: n07975909
+310: n02219486
+311: n02226429
+312: n02229544
+313: n02231487
+314: n02233338
+315: n02236044
+316: n02256656
+317: n02259212
+318: n02264363
+319: n02268443
+320: n02268853
+321: n02276258
+322: n02277742
+323: n02279972
+324: n02280649
+325: n02281406
+326: n02281787
+327: n02317335
+328: n02319095
+329: n02321529
+330: n02325366
+331: n02326432
+332: n02328150
+333: n02342885
+334: n02346627
+335: n02356798
+336: n02361337
+337: n05262120
+338: n02364673
+339: n02389026
+340: n02391049
+341: n02395406
+342: n02396427
+343: n02397096
+344: n02398521
+345: n02403003
+346: n02408429
+347: n02410509
+348: n02412080
+349: n02415577
+350: n02417914
+351: n02422106
+352: n02422699
+353: n02423022
+354: n02437312
+355: n02437616
+356: n10771990
+357: n14765497
+358: n02443114
+359: n02443484
+360: n14765785
+361: n02445715
+362: n02447366
+363: n02454379
+364: n02457408
+365: n02480495
+366: n02480855
+367: n02481823
+368: n02483362
+369: n02483708
+370: n02484975
+371: n02486261
+372: n02486410
+373: n02487347
+374: n02488291
+375: n02488702
+376: n02489166
+377: n02490219
+378: n02492035
+379: n02492660
+380: n02493509
+381: n02493793
+382: n02494079
+383: n02497673
+384: n02500267
+385: n02504013
+386: n02504458
+387: n02509815
+388: n02510455
+389: n02514041
+390: n07783967
+391: n02536864
+392: n02606052
+393: n02607072
+394: n02640242
+395: n02641379
+396: n02643566
+397: n02655020
+398: n02666347
+399: n02667093
+400: n02669723
+401: n02672831
+402: n02676566
+403: n02687172
+404: n02690373
+405: n02692877
+406: n02699494
+407: n02701002
+408: n02704792
+409: n02708093
+410: n02727426
+411: n08496334
+412: n02747177
+413: n02749479
+414: n02769748
+415: n02776631
+416: n02777292
+417: n02782329
+418: n02783161
+419: n02786058
+420: n02787622
+421: n02788148
+422: n02790996
+423: n02791124
+424: n02791270
+425: n02793495
+426: n02794156
+427: n02795169
+428: n02797295
+429: n02799071
+430: n02802426
+431: n02804515
+432: n02804610
+433: n02807133
+434: n02808304
+435: n02808440
+436: n02814533
+437: n02814860
+438: n02815834
+439: n02817516
+440: n02823428
+441: n02823750
+442: n02825657
+443: n02834397
+444: n02835271
+445: n02837789
+446: n02840245
+447: n02841315
+448: n02843684
+449: n02859443
+450: n02860847
+451: n02865351
+452: n02869837
+453: n02870880
+454: n02871525
+455: n02877765
+456: n02880308
+457: n02883205
+458: n02892201
+459: n02892767
+460: n02894605
+461: n02895154
+462: n12520864
+463: n02909870
+464: n02910353
+465: n02916936
+466: n02917067
+467: n02927161
+468: n02930766
+469: n02939185
+470: n02948072
+471: n02950826
+472: n02951358
+473: n02951585
+474: n02963159
+475: n02965783
+476: n02966193
+477: n02966687
+478: n02971356
+479: n02974003
+480: n02977058
+481: n02978881
+482: n02979186
+483: n02980441
+484: n02981792
+485: n02988304
+486: n02992211
+487: n02992529
+488: n13652994
+489: n03000134
+490: n03000247
+491: n03000684
+492: n03014705
+493: n03016953
+494: n03017168
+495: n03018349
+496: n03026506
+497: n03028079
+498: n03032252
+499: n03041632
+500: n03042490
+501: n03045698
+502: n03047690
+503: n03062245
+504: n03063599
+505: n03063689
+506: n03065424
+507: n03075370
+508: n03085013
+509: n03089624
+510: n03095699
+511: n03100240
+512: n03109150
+513: n03110669
+514: n03124043
+515: n03124170
+516: n15142452
+517: n03126707
+518: n03127747
+519: n03127925
+520: n03131574
+521: n03133878
+522: n03134739
+523: n03141823
+524: n03146219
+525: n03160309
+526: n03179701
+527: n03180011
+528: n03187595
+529: n03188531
+530: n03196217
+531: n03197337
+532: n03201208
+533: n03207743
+534: n03207941
+535: n03208938
+536: n03216828
+537: n03218198
+538: n13872072
+539: n03223299
+540: n03240683
+541: n03249569
+542: n07647870
+543: n03255030
+544: n03259401
+545: n03271574
+546: n03272010
+547: n03272562
+548: n03290653
+549: n13869788
+550: n03297495
+551: n03314780
+552: n03325584
+553: n03337140
+554: n03344393
+555: n03345487
+556: n03347037
+557: n03355925
+558: n03372029
+559: n03376595
+560: n03379051
+561: n03384352
+562: n03388043
+563: n03388183
+564: n03388549
+565: n03393912
+566: n03394916
+567: n03400231
+568: n03404251
+569: n03417042
+570: n03424325
+571: n03425413
+572: n03443371
+573: n03444034
+574: n03445777
+575: n03445924
+576: n03447447
+577: n03447721
+578: n08286342
+579: n03452741
+580: n03457902
+581: n03459775
+582: n03461385
+583: n03467068
+584: n03476684
+585: n03476991
+586: n03478589
+587: n03482001
+588: n03482405
+589: n03483316
+590: n03485407
+591: n03485794
+592: n03492542
+593: n03494278
+594: n03495570
+595: n10161363
+596: n03498962
+597: n03527565
+598: n03529860
+599: n09218315
+600: n03532672
+601: n03534580
+602: n03535780
+603: n03538406
+604: n03544143
+605: n03584254
+606: n03584829
+607: n03590841
+608: n03594734
+609: n03594945
+610: n03595614
+611: n03598930
+612: n03599486
+613: n03602883
+614: n03617480
+615: n03623198
+616: n15102712
+617: n03630383
+618: n03633091
+619: n03637318
+620: n03642806
+621: n03649909
+622: n03657121
+623: n03658185
+624: n07977870
+625: n03662601
+626: n03666591
+627: n03670208
+628: n03673027
+629: n03676483
+630: n03680355
+631: n03690938
+632: n03691459
+633: n03692522
+634: n03697007
+635: n03706229
+636: n03709823
+637: n03710193
+638: n03710637
+639: n03710721
+640: n03717622
+641: n03720891
+642: n03721384
+643: n03725035
+644: n03729826
+645: n03733131
+646: n03733281
+647: n03733805
+648: n03742115
+649: n03743016
+650: n03759954
+651: n03761084
+652: n03763968
+653: n03764736
+654: n03769881
+655: n03770439
+656: n03770679
+657: n03773504
+658: n03775071
+659: n03775546
+660: n03776460
+661: n03777568
+662: n03777754
+663: n03781244
+664: n03782006
+665: n03785016
+666: n14955889
+667: n03787032
+668: n03788195
+669: n03788365
+670: n03791053
+671: n03792782
+672: n03792972
+673: n03793489
+674: n03794056
+675: n03796401
+676: n03803284
+677: n13652335
+678: n03814639
+679: n03814906
+680: n03825788
+681: n03832673
+682: n03837869
+683: n03838899
+684: n03840681
+685: n03841143
+686: n03843555
+687: n03854065
+688: n03857828
+689: n03866082
+690: n03868242
+691: n03868863
+692: n07281099
+693: n03873416
+694: n03874293
+695: n03874599
+696: n03876231
+697: n03877472
+698: n08053121
+699: n03884397
+700: n03887697
+701: n03888257
+702: n03888605
+703: n03891251
+704: n03891332
+705: n03895866
+706: n03899768
+707: n03902125
+708: n03903868
+709: n03908618
+710: n03908714
+711: n03916031
+712: n03920288
+713: n03924679
+714: n03929660
+715: n03929855
+716: n03930313
+717: n03930630
+718: n03934042
+719: n03935335
+720: n03937543
+721: n03938244
+722: n03942813
+723: n03944341
+724: n03947888
+725: n03950228
+726: n03954731
+727: n03956157
+728: n03958227
+729: n03961711
+730: n03967562
+731: n03970156
+732: n03976467
+733: n08620881
+734: n03977966
+735: n03980874
+736: n03982430
+737: n03983396
+738: n03991062
+739: n03992509
+740: n03995372
+741: n03998194
+742: n04004767
+743: n13937284
+744: n04008634
+745: n04009801
+746: n04019541
+747: n04023962
+748: n13413294
+749: n04033901
+750: n04033995
+751: n04037443
+752: n04039381
+753: n09403211
+754: n04041544
+755: n04044716
+756: n04049303
+757: n04065272
+758: n07056680
+759: n04069434
+760: n04070727
+761: n04074963
+762: n04081281
+763: n04086273
+764: n04090263
+765: n04099969
+766: n04111531
+767: n04116512
+768: n04118538
+769: n04118776
+770: n04120489
+771: n04125116
+772: n04127249
+773: n04131690
+774: n04133789
+775: n04136333
+776: n04141076
+777: n04141327
+778: n04141975
+779: n04146614
+780: n04147291
+781: n04149813
+782: n04152593
+783: n04154340
+784: n07917272
+785: n04162706
+786: n04179913
+787: n04192698
+788: n04200800
+789: n04201297
+790: n04204238
+791: n04204347
+792: n04208427
+793: n04209133
+794: n04209239
+795: n04228054
+796: n04229816
+797: n04235860
+798: n04238763
+799: n04239074
+800: n04243546
+801: n04251144
+802: n04252077
+803: n04252225
+804: n04254120
+805: n04254680
+806: n04254777
+807: n04258138
+808: n04259630
+809: n04263257
+810: n04264628
+811: n04265275
+812: n04266014
+813: n04270147
+814: n04273569
+815: n04275363
+816: n05605498
+817: n04285008
+818: n04286575
+819: n08646566
+820: n04310018
+821: n04311004
+822: n04311174
+823: n04317175
+824: n04325704
+825: n04326547
+826: n04328186
+827: n04330267
+828: n04332243
+829: n04335435
+830: n04337157
+831: n04344873
+832: n04346328
+833: n04347754
+834: n04350905
+835: n04355338
+836: n04355933
+837: n04356056
+838: n04357314
+839: n04366367
+840: n04367480
+841: n04370456
+842: n04371430
+843: n14009946
+844: n04372370
+845: n04376876
+846: n04380533
+847: n04389033
+848: n04392985
+849: n04398044
+850: n04399382
+851: n04404412
+852: n04409515
+853: n04417672
+854: n04418357
+855: n04423845
+856: n04428191
+857: n04429376
+858: n04435653
+859: n04442312
+860: n04443257
+861: n04447861
+862: n04456115
+863: n04458633
+864: n04461696
+865: n04462240
+866: n04465666
+867: n04467665
+868: n04476259
+869: n04479046
+870: n04482393
+871: n04483307
+872: n04485082
+873: n04486054
+874: n04487081
+875: n04487394
+876: n04493381
+877: n04501370
+878: n04505470
+879: n04507155
+880: n04509417
+881: n04515003
+882: n04517823
+883: n04522168
+884: n04523525
+885: n04525038
+886: n04525305
+887: n04532106
+888: n04532670
+889: n04536866
+890: n04540053
+891: n04542943
+892: n04548280
+893: n04548362
+894: n04550184
+895: n04552348
+896: n04553703
+897: n04554684
+898: n04557648
+899: n04560804
+900: n04562935
+901: n04579145
+902: n04579667
+903: n04584207
+904: n04589890
+905: n04590129
+906: n04591157
+907: n04591713
+908: n10782135
+909: n04596742
+910: n04598010
+911: n04599235
+912: n04604644
+913: n14423870
+914: n04612504
+915: n04613696
+916: n06359193
+917: n06596364
+918: n06785654
+919: n06794110
+920: n06874185
+921: n07248320
+922: n07565083
+923: n07657664
+924: n07583066
+925: n07584110
+926: n07590611
+927: n07613480
+928: n07614500
+929: n07615774
+930: n07684084
+931: n07693725
+932: n07695742
+933: n07697313
+934: n07697537
+935: n07711569
+936: n07714571
+937: n07714990
+938: n07715103
+939: n12159804
+940: n12160303
+941: n12160857
+942: n07717556
+943: n07718472
+944: n07718747
+945: n07720875
+946: n07730033
+947: n13001041
+948: n07742313
+949: n12630144
+950: n14991210
+951: n07749582
+952: n07753113
+953: n07753275
+954: n07753592
+955: n07754684
+956: n07760859
+957: n07768694
+958: n07802026
+959: n07831146
+960: n07836838
+961: n07860988
+962: n07871810
+963: n07873807
+964: n07875152
+965: n07880968
+966: n07892512
+967: n07920052
+968: n13904665
+969: n07932039
+970: n09193705
+971: n09229709
+972: n09246464
+973: n09256479
+974: n09288635
+975: n09332890
+976: n09399592
+977: n09421951
+978: n09428293
+979: n09468604
+980: n09472597
+981: n09835506
+982: n10148035
+983: n10565667
+984: n11879895
+985: n11939491
+986: n12057211
+987: n12144580
+988: n12267677
+989: n12620546
+990: n12768682
+991: n12985857
+992: n12998815
+993: n13037406
+994: n13040303
+995: n13044778
+996: n13052670
+997: n13054560
+998: n13133613
+999: n15075141

gligen/ldm/data/lsun.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import os
+import numpy as np
+import PIL
+from PIL import Image
+from torch.utils.data import Dataset
+from torchvision import transforms
+class LSUNBase(Dataset):
+    def __init__(self,
+                 txt_file,
+                 data_root,
+                 size=None,
+                 interpolation="bicubic",
+                 flip_p=0.5
+                 ):
+        self.data_paths = txt_file
+        self.data_root = data_root
+        with open(self.data_paths, "r") as f:
+            self.image_paths = f.read().splitlines()
+        self._length = len(self.image_paths)
+        self.labels = {
+            "relative_file_path_": [l for l in self.image_paths],
+            "file_path_": [os.path.join(self.data_root, l)
+                           for l in self.image_paths],
+        }
+        self.size = size
+        self.interpolation = {"linear": PIL.Image.LINEAR,
+                              "bilinear": PIL.Image.BILINEAR,
+                              "bicubic": PIL.Image.BICUBIC,
+                              "lanczos": PIL.Image.LANCZOS,
+                              }[interpolation]
+        self.flip = transforms.RandomHorizontalFlip(p=flip_p)
+    def __len__(self):
+        return self._length
+    def __getitem__(self, i):
+        example = dict((k, self.labels[k][i]) for k in self.labels)
+        image = Image.open(example["file_path_"])
+        if not image.mode == "RGB":
+            image = image.convert("RGB")
+        # default to score-sde preprocessing
+        img = np.array(image).astype(np.uint8)
+        crop = min(img.shape[0], img.shape[1])
+        h, w, = img.shape[0], img.shape[1]
+        img = img[(h - crop) // 2:(h + crop) // 2,
+              (w - crop) // 2:(w + crop) // 2]
+        image = Image.fromarray(img)
+        if self.size is not None:
+            image = image.resize((self.size, self.size), resample=self.interpolation)
+        image = self.flip(image)
+        image = np.array(image).astype(np.uint8)
+        example["image"] = (image / 127.5 - 1.0).astype(np.float32)
+        return example
+class LSUNChurchesTrain(LSUNBase):
+    def __init__(self, **kwargs):
+        super().__init__(txt_file="data/lsun/church_outdoor_train.txt", data_root="data/lsun/churches", **kwargs)
+class LSUNChurchesValidation(LSUNBase):
+    def __init__(self, flip_p=0., **kwargs):
+        super().__init__(txt_file="data/lsun/church_outdoor_val.txt", data_root="data/lsun/churches",
+                         flip_p=flip_p, **kwargs)
+class LSUNBedroomsTrain(LSUNBase):
+    def __init__(self, **kwargs):
+        super().__init__(txt_file="data/lsun/bedrooms_train.txt", data_root="data/lsun/bedrooms", **kwargs)
+class LSUNBedroomsValidation(LSUNBase):
+    def __init__(self, flip_p=0.0, **kwargs):
+        super().__init__(txt_file="data/lsun/bedrooms_val.txt", data_root="data/lsun/bedrooms",
+                         flip_p=flip_p, **kwargs)
+class LSUNCatsTrain(LSUNBase):
+    def __init__(self, **kwargs):
+        super().__init__(txt_file="data/lsun/cat_train.txt", data_root="data/lsun/cats", **kwargs)
+class LSUNCatsValidation(LSUNBase):
+    def __init__(self, flip_p=0., **kwargs):
+        super().__init__(txt_file="data/lsun/cat_val.txt", data_root="data/lsun/cats",
+                         flip_p=flip_p, **kwargs)

gligen/ldm/lr_scheduler.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import numpy as np
+class LambdaWarmUpCosineScheduler:
+    """
+    note: use with a base_lr of 1.0
+    """
+    def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_steps, verbosity_interval=0):
+        self.lr_warm_up_steps = warm_up_steps
+        self.lr_start = lr_start
+        self.lr_min = lr_min
+        self.lr_max = lr_max
+        self.lr_max_decay_steps = max_decay_steps
+        self.last_lr = 0.
+        self.verbosity_interval = verbosity_interval
+    def schedule(self, n, **kwargs):
+        if self.verbosity_interval > 0:
+            if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_lr}")
+        if n < self.lr_warm_up_steps:
+            lr = (self.lr_max - self.lr_start) / self.lr_warm_up_steps * n + self.lr_start
+            self.last_lr = lr
+            return lr
+        else:
+            t = (n - self.lr_warm_up_steps) / (self.lr_max_decay_steps - self.lr_warm_up_steps)
+            t = min(t, 1.0)
+            lr = self.lr_min + 0.5 * (self.lr_max - self.lr_min) * (
+                    1 + np.cos(t * np.pi))
+            self.last_lr = lr
+            return lr
+    def __call__(self, n, **kwargs):
+        return self.schedule(n,**kwargs)
+class LambdaWarmUpCosineScheduler2:
+    """
+    supports repeated iterations, configurable via lists
+    note: use with a base_lr of 1.0.
+    """
+    def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosity_interval=0):
+        assert len(warm_up_steps) == len(f_min) == len(f_max) == len(f_start) == len(cycle_lengths)
+        self.lr_warm_up_steps = warm_up_steps
+        self.f_start = f_start
+        self.f_min = f_min
+        self.f_max = f_max
+        self.cycle_lengths = cycle_lengths
+        self.cum_cycles = np.cumsum([0] + list(self.cycle_lengths))
+        self.last_f = 0.
+        self.verbosity_interval = verbosity_interval
+    def find_in_interval(self, n):
+        interval = 0
+        for cl in self.cum_cycles[1:]:
+            if n <= cl:
+                return interval
+            interval += 1
+    def schedule(self, n, **kwargs):
+        cycle = self.find_in_interval(n)
+        n = n - self.cum_cycles[cycle]
+        if self.verbosity_interval > 0:
+            if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, "
+                                                       f"current cycle {cycle}")
+        if n < self.lr_warm_up_steps[cycle]:
+            f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle]
+            self.last_f = f
+            return f
+        else:
+            t = (n - self.lr_warm_up_steps[cycle]) / (self.cycle_lengths[cycle] - self.lr_warm_up_steps[cycle])
+            t = min(t, 1.0)
+            f = self.f_min[cycle] + 0.5 * (self.f_max[cycle] - self.f_min[cycle]) * (
+                    1 + np.cos(t * np.pi))
+            self.last_f = f
+            return f
+    def __call__(self, n, **kwargs):
+        return self.schedule(n, **kwargs)
+class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2):
+    def schedule(self, n, **kwargs):
+        cycle = self.find_in_interval(n)
+        n = n - self.cum_cycles[cycle]
+        if self.verbosity_interval > 0:
+            if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f}, "
+                                                       f"current cycle {cycle}")
+        if n < self.lr_warm_up_steps[cycle]:
+            f = (self.f_max[cycle] - self.f_start[cycle]) / self.lr_warm_up_steps[cycle] * n + self.f_start[cycle]
+            self.last_f = f
+            return f
+        else:
+            f = self.f_min[cycle] + (self.f_max[cycle] - self.f_min[cycle]) * (self.cycle_lengths[cycle] - n) / (self.cycle_lengths[cycle])
+            self.last_f = f
+            return f

gligen/ldm/models/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

gligen/ldm/models/__pycache__/autoencoder.cpython-38.pyc ADDED Viewed

Binary file (1.58 kB). View file

gligen/ldm/models/autoencoder.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import torch
+import torch.nn as nn
+#import pytorch_lightning as pl
+import torch.nn.functional as F
+from contextlib import contextmanager
+# from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
+from ldm.modules.diffusionmodules.model import Encoder, Decoder
+from ldm.modules.distributions.distributions import DiagonalGaussianDistribution
+from ldm.util import instantiate_from_config
+class AutoencoderKL(nn.Module):
+    def __init__(self,
+                 ddconfig,
+                 embed_dim,
+                 scale_factor=1
+                 ):
+        super().__init__()
+        self.encoder = Encoder(**ddconfig)
+        self.decoder = Decoder(**ddconfig)
+        assert ddconfig["double_z"]
+        self.quant_conv = torch.nn.Conv2d(2*ddconfig["z_channels"], 2*embed_dim, 1)
+        self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
+        self.embed_dim = embed_dim
+        self.scale_factor = scale_factor
+    def encode(self, x):
+        h = self.encoder(x)
+        moments = self.quant_conv(h)
+        posterior = DiagonalGaussianDistribution(moments)
+        return posterior.sample() * self.scale_factor
+    def decode(self, z):
+        z = 1. / self.scale_factor * z
+        z = self.post_quant_conv(z)
+        dec = self.decoder(z)
+        return dec

gligen/ldm/models/diffusion/__init__.py ADDED Viewed

File without changes

gligen/ldm/models/diffusion/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (159 Bytes). View file

gligen/ldm/models/diffusion/__pycache__/ddim.cpython-38.pyc ADDED Viewed

Binary file (4.57 kB). View file

gligen/ldm/models/diffusion/__pycache__/ddpm.cpython-38.pyc ADDED Viewed

Binary file (2.12 kB). View file

gligen/ldm/models/diffusion/__pycache__/gaussian_smoothing.cpython-38.pyc ADDED Viewed

Binary file (4.11 kB). View file

gligen/ldm/models/diffusion/__pycache__/ldm.cpython-38.pyc ADDED Viewed

Binary file (1.21 kB). View file

gligen/ldm/models/diffusion/__pycache__/loss.cpython-38.pyc ADDED Viewed

Binary file (4.23 kB). View file