from refer.refer import REFER import numpy as np from PIL import Image import random import os from tqdm import tqdm import pickle from poly_utils import is_clockwise, revert_direction, check_length, reorder_points, \ approximate_polygons, interpolate_polygons, image_to_base64, polygons_to_string max_length = 400 data_root = './refer/data' datasets = ['refcoco', 'refcoco+', 'refcocog'] image_dir = './datasets/images/mscoco/train2014' val_test_files = pickle.load(open("data/val_test_files.p", "rb")) combined_train_data = [] for dataset in datasets: if dataset == 'refcoco': splits = ['train', 'val', 'testA', 'testB'] splitBy = 'unc' elif dataset == 'refcoco+': splits = ['train', 'val', 'testA', 'testB'] splitBy = 'unc' elif dataset == 'refcocog': splits = ['train', 'val'] splitBy = 'umd' save_dir = f'datasets/finetune/{dataset}' os.makedirs(save_dir, exist_ok=True) for split in splits: num_pts = [] max_num_pts = 0 file_name = os.path.join(save_dir, f"{dataset}_{split}.tsv") print("creating ", file_name) uniq_ids = [] image_ids = [] sents = [] coeffs_strings = [] img_strings = [] writer = open(file_name, 'w') refer = REFER(data_root, dataset, splitBy) ref_ids = refer.getRefIds(split=split) for this_ref_id in tqdm(ref_ids): this_img_id = refer.getImgIds(this_ref_id) this_img = refer.Imgs[this_img_id[0]] fn = this_img['file_name'] img_id = fn.split(".")[0].split("_")[-1] # load image img = Image.open(os.path.join(image_dir, this_img['file_name'])).convert("RGB") # convert image to string img_base64 = image_to_base64(img, format='jpeg') # load mask ref = refer.loadRefs(this_ref_id) ref_mask = np.array(refer.getMask(ref[0])['mask']) annot = np.zeros(ref_mask.shape) annot[ref_mask == 1] = 1 # 255 annot_img = Image.fromarray(annot.astype(np.uint8), mode="P") annot_base64 = image_to_base64(annot_img, format='png') polygons = refer.getPolygon(ref[0])['polygon'] polygons_processed = [] for polygon in polygons: # make the polygon clockwise if not is_clockwise(polygon): polygon = revert_direction(polygon) # reorder the polygon so that the first vertex is the one closest to image origin polygon = reorder_points(polygon) polygons_processed.append(polygon) polygons = sorted(polygons_processed, key=lambda x: (x[0] ** 2 + x[1] ** 2, x[0], x[1])) polygons_interpolated = interpolate_polygons(polygons) polygons = approximate_polygons(polygons, 5, max_length) pts_string = polygons_to_string(polygons) pts_string_interpolated = polygons_to_string(polygons_interpolated) # load box box = refer.getRefBox(this_ref_id) # x,y,w,h x, y, w, h = box box_string = f'{x},{y},{x + w},{y + h}' max_num_pts = max(max_num_pts, check_length(polygons)) num_pts.append(check_length(polygons)) # load text ref_sent = refer.Refs[this_ref_id] for i, (sent, sent_id) in enumerate(zip(ref_sent['sentences'], ref_sent['sent_ids'])): uniq_id = f"{this_ref_id}_{i}" instance = '\t'.join( [uniq_id, str(this_img_id[0]), sent['sent'], box_string, pts_string, img_base64, annot_base64, pts_string_interpolated]) + '\n' writer.write(instance) if img_id not in val_test_files and split == 'train': # filtered out val/test files combined_train_data.append(instance) writer.close() random.shuffle(combined_train_data) file_name = os.path.join("datasets/finetune/refcoco+g_train_shuffled.tsv") print("creating ", file_name) writer = open(file_name, 'w') writer.writelines(combined_train_data) writer.close()