# ------------------------------------------------------------------------
# Modified from OFA (
# Copyright 2022 The OFA-Sys Team.
# All rights reserved.
# This source code is licensed under the Apache 2.0 license
# found in the LICENSE file in the root directory.
# ------------------------------------------------------------------------
# Modifications Copyright, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import json
from itertools import chain
import os
import torch
import torch.distributed as dist
import numpy as np
from skimage import draw
from PIL import Image
from utils.vis_utils import overlay_predictions
from torchvision.utils import save_image
SMOOTH = 1e-6
def check_length(polygons):
length = 0
for polygon in polygons:
length += len(polygon)
return length
def eval_refcoco(task, generator, models, sample, **kwargs):
def _computeIoU(pred_seg, gd_seg):
I = np.sum(np.logical_and(pred_seg, gd_seg))
U = np.sum(np.logical_or(pred_seg, gd_seg))
return I, U
def _calculate_ap_score(hyps, refs, thresh=0.5):
interacts =
[torch.where(hyps[:, :2] < refs[:, :2], refs[:, :2], hyps[:, :2]),
torch.where(hyps[:, 2:] < refs[:, 2:], hyps[:, 2:], refs[:, 2:])],
area_predictions = (hyps[:, 2] - hyps[:, 0]) * (hyps[:, 3] - hyps[:, 1])
area_targets = (refs[:, 2] - refs[:, 0]) * (refs[:, 3] - refs[:, 1])
interacts_w = interacts[:, 2] - interacts[:, 0]
interacts_h = interacts[:, 3] - interacts[:, 1]
area_interacts = interacts_w * interacts_h
ious = area_interacts / (area_predictions + area_targets - area_interacts + 1e-6)
return ((ious >= thresh) & (interacts_w > 0) & (interacts_h > 0)).float()
def convert_pts(coeffs):
pts = []
for i in range(len(coeffs) // 2):
pts.append([coeffs[2 * i + 1], coeffs[2 * i]]) # y, x
return np.array(pts, np.int32)
def get_mask_from_codes(codes, img_size):
masks = [np.zeros(img_size)]
for code in codes:
if len(code) > 0:
mask = draw.polygon2mask(img_size, convert_pts(code))
mask = np.array(mask, np.uint8)
mask = np.zeros(img_size)
mask = sum(masks)
mask = mask > 0
return mask.astype(np.uint8)
def _calculate_score(hyps, hyps_det, refs, sample, n_poly_pred, n_poly_gt, vis=True, vis_dir=None):
if vis:
os.makedirs(vis_dir, exist_ok=True)
def compute_jf(pred_mask, gt_mask):
I, U = _computeIoU(pred_mask, gt_mask)
if U == 0:
this_iou = 0.0
this_iou = I * 1.0 / U
prec = (I + SMOOTH) / (pred_mask.sum() + SMOOTH)
rec = (I + SMOOTH) / (gt_mask.sum() + SMOOTH)
this_f = 2 * prec * rec / (prec + rec)
return this_iou, this_f, I, U
IoU = []
F_score = []
cum_I = []
cum_U = []
bboxes = hyps_det
b = len(hyps)
bboxes = torch.tensor(np.stack(bboxes, 0))
bboxes =['w_resize_ratios'].device)
ap_scores = _calculate_ap_score(bboxes.float(), sample['region_coords'].float())
for i in range(b):
hyps_i = hyps[i]
gt_mask = refs[i]
pred_mask = get_mask_from_codes(hyps_i, gt_mask.shape[0:2])
this_iou, this_f, this_I, this_U = compute_jf(pred_mask, gt_mask)
if vis:
def pre_caption(caption):
import re
caption = caption.lower().lstrip(",.!?*#:;~").replace('-', ' ').replace('/', ' ').replace(
caption = re.sub(
' ',
caption = caption.rstrip('\n')
return caption
gt_box = sample['region_coords'][i].cpu().numpy()
pred_box = bboxes[i].cpu().numpy()
pred_box[::2] *= sample['w_resize_ratios'][i].cpu().numpy()
pred_box[1::2] *= sample['h_resize_ratios'][i].cpu().numpy()
gt_box[::2] *= sample['w_resize_ratios'][i].cpu().numpy()
gt_box[1::2] *= sample['h_resize_ratios'][i].cpu().numpy()
uniq_id = sample["id"][i]
text = sample["text"][i]
text = pre_caption(text)
img = sample["net_input"]['patch_images'][i]
img = (img + 1) / 2
img_ndarray = img.permute(1, 2, 0).cpu().numpy() * 255
img_ndarray = img_ndarray.astype(np.uint8)
gt_overlayed_fn = f"{uniq_id}_{text}_gt_overlayed.png"
pred_overlayed_fn = f"{uniq_id}_{text}_pred_overlayed.png"
pred_overlayed = overlay_predictions(img_ndarray, pred_mask, hyps_i, pred_box)
gt_overlayed = overlay_predictions(img_ndarray, gt_mask, None, gt_box)
pred_overlayed = Image.fromarray(pred_overlayed.astype(np.uint8)), pred_overlayed_fn))
gt_overlayed = Image.fromarray(gt_overlayed.astype(np.uint8)), gt_overlayed_fn))
img_fn = f"{uniq_id}_{text}.png"
save_image(img, os.path.join(vis_dir, img_fn))
return torch.tensor(IoU), torch.tensor(F_score), ap_scores, torch.tensor(cum_I), torch.tensor(cum_U)
gen_out = task.inference_step(models, sample)
hyps = []
hyps_det = []
n_poly_pred = []
b = len(gen_out)
poly_len = []
for i in range(b):
gen_out_i = np.array(gen_out[i])
gen_out_i = gen_out_i[gen_out_i != -1] # excluding eos and padding indices
gen_out_i_det = gen_out_i[:4]
gen_out_i_det[::2] *= sample['w'][i].cpu().numpy()
gen_out_i_det[1::2] *= sample['h'][i].cpu().numpy()
polygons_pred = gen_out_i[4:]
polygons_pred = np.append(polygons_pred, [2])
size = len(polygons_pred)
idx_list = [idx for idx, val in
enumerate(polygons_pred) if val == 2] # 2 indicates separator token
polygons_pred *= task.cfg.patch_image_size
# extract the sequence for each polygon
polygons = []
prev_idx = 0
for idx in idx_list:
cur_idx = idx
if prev_idx == cur_idx or prev_idx == size:
polygons.append(polygons_pred[prev_idx: cur_idx])
prev_idx = cur_idx + 1
gt = sample['label']
results = [
{"uniq_id": sample_id}
for i, sample_id in enumerate(sample["id"].tolist())
iou_scores, f_scores, ap_scores, cum_I, cum_U = _calculate_score(hyps, hyps_det, gt, sample, n_poly_pred,
vis=kwargs['vis'], vis_dir=kwargs['vis_dir'])
result_dir = kwargs['result_dir']
os.makedirs(result_dir, exist_ok=True){"iou_scores": iou_scores, "ap_scores": ap_scores, "n_poly_pred": n_poly_pred,
"n_poly_gt": sample['n_poly'], "poly_len": poly_len, "uniq_id": sample["id"]},
os.path.join(result_dir, f'{sample["id"][0]}.pt'))
return results, iou_scores, f_scores, ap_scores, cum_I, cum_U
def eval_step(task, generator, models, sample, **kwargs):
if task.cfg._name == 'refcoco':
return eval_refcoco(task, generator, models, sample, **kwargs)
raise NotImplementedError
def merge_results(task, cfg, logger, score_cnt, score_sum, f_score_sum=None, ap_det_score_sum=None, prec_score_sum=None,
cum_I_sum=None, cum_U_sum=None, results=None):
if task.cfg._name == 'image_gen':
if cfg.distributed_training.distributed_world_size > 1:
if score_cnt.item() > 0:"score_sum: {}, score_cnt: {}, score: {}".format(
score_sum, score_cnt, round(score_sum.item() / score_cnt.item(), 4)
gather_results = None
if cfg.distributed_training.distributed_world_size > 1:
gather_results = [None for _ in range(dist.get_world_size())]
dist.all_gather_object(gather_results, results)
for prec_score in prec_score_sum:
if score_cnt.item() > 0:
prec_list = [.5, .6, .7, .8, .9]
txt = "sample_cnt: {}, mIoU score: {}, oIoU score: {}, ap det score: {}, f score: {}, J&F: {}\n".format(
score_cnt, round(score_sum.item() / score_cnt.item(), 4),
round(cum_I_sum.item() / cum_U_sum.item(), 4),
round(ap_det_score_sum.item() / score_cnt.item(), 4),
round(f_score_sum.item() / score_cnt.item(), 4),
round((f_score_sum.item() + score_sum.item()) / (2 * score_cnt.item()), 4)
prec_txt = " ".join(
[f"prec@{prec}: {round(prec_score.item() / score_cnt.item(), 4)}\n" for prec, prec_score in
zip(prec_list, prec_score_sum)])
txt += prec_txt
output_path = os.path.join(cfg.common_eval.results_path, "{}_result.txt".format(cfg.dataset.gen_subset))
os.makedirs(cfg.common_eval.results_path, exist_ok=True)
with open(output_path, 'w') as f:
if cfg.distributed_training.distributed_world_size == 1 or dist.get_rank() == 0:
os.makedirs(cfg.common_eval.results_path, exist_ok=True)
output_path = os.path.join(cfg.common_eval.results_path, "{}_predict.json".format(cfg.dataset.gen_subset))
gather_results = list(chain(*gather_results)) if gather_results is not None else results
with open(output_path, 'w') as fw:
json.dump(gather_results, fw)