PolyFormer / data /create_finetuning_data.py
jiang
init commit
650c5f6
raw
history blame
4.23 kB
from refer.refer import REFER
import numpy as np
from PIL import Image
import random
import os
from tqdm import tqdm
import pickle
from poly_utils import is_clockwise, revert_direction, check_length, reorder_points, \
approximate_polygons, interpolate_polygons, image_to_base64, polygons_to_string
max_length = 400
data_root = './refer/data'
datasets = ['refcoco', 'refcoco+', 'refcocog']
image_dir = './datasets/images/mscoco/train2014'
val_test_files = pickle.load(open("data/val_test_files.p", "rb"))
combined_train_data = []
for dataset in datasets:
if dataset == 'refcoco':
splits = ['train', 'val', 'testA', 'testB']
splitBy = 'unc'
elif dataset == 'refcoco+':
splits = ['train', 'val', 'testA', 'testB']
splitBy = 'unc'
elif dataset == 'refcocog':
splits = ['train', 'val']
splitBy = 'umd'
save_dir = f'datasets/finetune/{dataset}'
os.makedirs(save_dir, exist_ok=True)
for split in splits:
num_pts = []
max_num_pts = 0
file_name = os.path.join(save_dir, f"{dataset}_{split}.tsv")
print("creating ", file_name)
uniq_ids = []
image_ids = []
sents = []
coeffs_strings = []
img_strings = []
writer = open(file_name, 'w')
refer = REFER(data_root, dataset, splitBy)
ref_ids = refer.getRefIds(split=split)
for this_ref_id in tqdm(ref_ids):
this_img_id = refer.getImgIds(this_ref_id)
this_img = refer.Imgs[this_img_id[0]]
fn = this_img['file_name']
img_id = fn.split(".")[0].split("_")[-1]
# load image
img = Image.open(os.path.join(image_dir, this_img['file_name'])).convert("RGB")
# convert image to string
img_base64 = image_to_base64(img, format='jpeg')
# load mask
ref = refer.loadRefs(this_ref_id)
ref_mask = np.array(refer.getMask(ref[0])['mask'])
annot = np.zeros(ref_mask.shape)
annot[ref_mask == 1] = 1 # 255
annot_img = Image.fromarray(annot.astype(np.uint8), mode="P")
annot_base64 = image_to_base64(annot_img, format='png')
polygons = refer.getPolygon(ref[0])['polygon']
polygons_processed = []
for polygon in polygons:
# make the polygon clockwise
if not is_clockwise(polygon):
polygon = revert_direction(polygon)
# reorder the polygon so that the first vertex is the one closest to image origin
polygon = reorder_points(polygon)
polygons_processed.append(polygon)
polygons = sorted(polygons_processed, key=lambda x: (x[0] ** 2 + x[1] ** 2, x[0], x[1]))
polygons_interpolated = interpolate_polygons(polygons)
polygons = approximate_polygons(polygons, 5, max_length)
pts_string = polygons_to_string(polygons)
pts_string_interpolated = polygons_to_string(polygons_interpolated)
# load box
box = refer.getRefBox(this_ref_id) # x,y,w,h
x, y, w, h = box
box_string = f'{x},{y},{x + w},{y + h}'
max_num_pts = max(max_num_pts, check_length(polygons))
num_pts.append(check_length(polygons))
# load text
ref_sent = refer.Refs[this_ref_id]
for i, (sent, sent_id) in enumerate(zip(ref_sent['sentences'], ref_sent['sent_ids'])):
uniq_id = f"{this_ref_id}_{i}"
instance = '\t'.join(
[uniq_id, str(this_img_id[0]), sent['sent'], box_string, pts_string, img_base64, annot_base64,
pts_string_interpolated]) + '\n'
writer.write(instance)
if img_id not in val_test_files and split == 'train': # filtered out val/test files
combined_train_data.append(instance)
writer.close()
random.shuffle(combined_train_data)
file_name = os.path.join("datasets/finetune/refcoco+g_train_shuffled.tsv")
print("creating ", file_name)
writer = open(file_name, 'w')
writer.writelines(combined_train_data)
writer.close()