Spaces:

LXT
/

OMG_Seg

Sleeping

App Files Files Community

HarborYuan commited on Jan 22

Commit

1f8df14

•

1 Parent(s): acdf070

adapt for omg

Browse files

Files changed (19) hide show

app/configs/m2_convl.py +1 -0
ext/cityscapes_scripts/createPanopticImgs.py +0 -194
ext/cityscapes_scripts/helpers/__init__.py +0 -1
ext/cityscapes_scripts/helpers/annotation.py +0 -441
ext/cityscapes_scripts/helpers/csHelpers.py +0 -129
ext/cityscapes_scripts/helpers/labels.py +0 -182
ext/cityscapes_scripts/helpers/labels_cityPersons.py +0 -61
ext/cityscapes_scripts/helpers/version.py +0 -9
ext/davis2017/__init__.py +0 -3
ext/davis2017/davis.py +0 -122
ext/davis2017/evaluation.py +0 -110
ext/davis2017/metrics.py +0 -197
ext/davis2017/results.py +0 -52
ext/davis2017/utils.py +0 -174
main.py +31 -15
seg/models/detectors/mask2former_vid.py +18 -16
seg/models/utils/__init__.py +0 -2
seg/models/utils/offline_video_metrics.py +0 -114
seg/models/utils/online_pq_utils.py +0 -73

app/configs/m2_convl.py CHANGED Viewed

@@ -36,6 +36,7 @@ model = dict(
         sphere_cls=True,
         ov_classifier_name=f'{ov_model_name}_{ov_datasets_name}',
         logit=None,
         in_channels=[192, 384, 768, 1536],  # pass to pixel_decoder inside
         strides=[4, 8, 16, 32],
         feat_channels=256,

         sphere_cls=True,
         ov_classifier_name=f'{ov_model_name}_{ov_datasets_name}',
         logit=None,
+        enable_box_query=True,
         in_channels=[192, 384, 768, 1536],  # pass to pixel_decoder inside
         strides=[4, 8, 16, 32],
         feat_channels=256,

ext/cityscapes_scripts/createPanopticImgs.py DELETED Viewed

@@ -1,194 +0,0 @@
-#!/usr/bin/python
-#
-# Converts the *instanceIds.png annotations of the Cityscapes dataset
-# to COCO-style panoptic segmentation format (http://cocodataset.org/#format-data).
-# The convertion is working for 'fine' set of the annotations.
-#
-# By default with this tool uses IDs specified in labels.py. You can use flag
-# --use-train-id to get train ids for categories. 'ignoreInEval' categories are
-# removed during the conversion.
-#
-# In panoptic segmentation format image_id is used to match predictions and ground truth.
-# For cityscapes image_id has form <city>_123456_123456 and corresponds to the prefix
-# of cityscapes image files.
-#
-# python imports
-from __future__ import print_function, absolute_import, division, unicode_literals
-import os
-import glob
-import sys
-import argparse
-import json
-import numpy as np
-# Image processing
-from PIL import Image
-# cityscapes imports
-from ext.cityscapes_scripts.helpers.csHelpers import printError
-from ext.cityscapes_scripts.helpers.labels import id2label, labels
-import mmengine
-# The main method
-def convert2panoptic(cityscapesPath=None, outputFolder=None, useTrainId=False, setNames=["val", "train", "test"]):
-    # Where to look for Cityscapes
-    if cityscapesPath is None:
-        if 'CITYSCAPES_DATASET' in os.environ:
-            cityscapesPath = os.environ['CITYSCAPES_DATASET']
-        else:
-            cityscapesPath = 'data/cityscapes'
-        cityscapesPath = os.path.join(cityscapesPath, "gtFine")
-    if outputFolder is None:
-        outputFolder = cityscapesPath.replace('gtFine', "annotations")
-    mmengine.mkdir_or_exist(outputFolder)
-    categories = []
-    for label in labels:
-        if label.ignoreInEval:
-            continue
-        categories.append({'id': int(label.trainId) if useTrainId else int(label.id),
-                           'name': label.name,
-                           'color': label.color,
-                           'supercategory': label.category,
-                           'isthing': 1 if label.hasInstances else 0})
-    categories = sorted(categories, key=lambda x:x['id'])
-    for setName in setNames:
-        # how to search for all ground truth
-        searchFine   = os.path.join(cityscapesPath, setName, "*", "*_instanceIds.png")
-        # search files
-        filesFine = glob.glob(searchFine)
-        filesFine.sort()
-        files = filesFine
-        # quit if we did not find anything
-        if not files:
-            printError(
-                "Did not find any files for {} set using matching pattern {}. Please consult the README.".format(setName, searchFine)
-            )
-        # a bit verbose
-        print("Converting {} annotation files for {} set.".format(len(files), setName))
-        trainIfSuffix = "_trainId" if useTrainId else ""
-        outputBaseFile = "cityscapes_panoptic_{}{}".format(setName, trainIfSuffix)
-        outFile = os.path.join(outputFolder, "{}.json".format(outputBaseFile))
-        print("Json file with the annotations in panoptic format will be saved in {}".format(outFile))
-        panopticFolder = os.path.join(outputFolder, outputBaseFile)
-        if not os.path.isdir(panopticFolder):
-            print("Creating folder {} for panoptic segmentation PNGs".format(panopticFolder))
-            os.mkdir(panopticFolder)
-        print("Corresponding segmentations in .png format will be saved in {}".format(panopticFolder))
-        images = []
-        annotations = []
-        for progress, f in enumerate(files):
-            originalFormat = np.array(Image.open(f))
-            fileName = os.path.basename(f)
-            location = fileName.split('_')[0]
-            imageId = fileName.replace("_gtFine_instanceIds.png", "")
-            fileName = os.path.join(location, fileName)
-            inputFileName = fileName.replace("_gtFine_instanceIds.png", "_leftImg8bit.png")
-            outputFileName = fileName.replace("_gtFine_instanceIds.png", "_panoptic.png")
-            # image entry, id for image is its filename without extension
-            images.append({"id": imageId,
-                           "width": int(originalFormat.shape[1]),
-                           "height": int(originalFormat.shape[0]),
-                           "file_name": inputFileName})
-            pan_format = np.zeros(
-                (originalFormat.shape[0], originalFormat.shape[1], 3), dtype=np.uint8
-            )
-            segmentIds = np.unique(originalFormat)
-            segmInfo = []
-            for segmentId in segmentIds:
-                if segmentId < 1000:
-                    semanticId = segmentId
-                    isCrowd = 1
-                else:
-                    semanticId = segmentId // 1000
-                    isCrowd = 0
-                labelInfo = id2label[semanticId]
-                categoryId = labelInfo.trainId if useTrainId else labelInfo.id
-                if labelInfo.ignoreInEval:
-                    continue
-                if not labelInfo.hasInstances:
-                    isCrowd = 0
-                mask = originalFormat == segmentId
-                color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256]
-                pan_format[mask] = color
-                area = np.sum(mask) # segment area computation
-                # bbox computation for a segment
-                hor = np.sum(mask, axis=0)
-                hor_idx = np.nonzero(hor)[0]
-                x = hor_idx[0]
-                width = hor_idx[-1] - x + 1
-                vert = np.sum(mask, axis=1)
-                vert_idx = np.nonzero(vert)[0]
-                y = vert_idx[0]
-                height = vert_idx[-1] - y + 1
-                bbox = [int(x), int(y), int(width), int(height)]
-                segmInfo.append({"id": int(segmentId),
-                                 "category_id": int(categoryId),
-                                 "area": int(area),
-                                 "bbox": bbox,
-                                 "iscrowd": isCrowd})
-            annotations.append({'image_id': imageId,
-                                'file_name': outputFileName,
-                                "segments_info": segmInfo})
-            mmengine.mkdir_or_exist(os.path.dirname(os.path.join(panopticFolder, outputFileName)))
-            Image.fromarray(pan_format).save(os.path.join(panopticFolder, outputFileName))
-            print("\rProgress: {:>3.2f} %".format((progress + 1) * 100 / len(files)), end=' ')
-            sys.stdout.flush()
-        print("\nSaving the json file {}".format(outFile))
-        d = {'images': images,
-             'annotations': annotations,
-             'categories': categories}
-        with open(outFile, 'w') as f:
-            json.dump(d, f, sort_keys=True, indent=4)
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--dataset-folder",
-                        dest="cityscapesPath",
-                        help="path to the Cityscapes dataset 'gtFine' folder",
-                        default=None,
-                        type=str)
-    parser.add_argument("--output-folder",
-                        dest="outputFolder",
-                        help="path to the output folder.",
-                        default=None,
-                        type=str)
-    parser.add_argument("--use-train-id", default=True,action="store_true", dest="useTrainId")
-    parser.add_argument("--set-names",
-                        dest="setNames",
-                        help="set names to which apply the function to",
-                        nargs='+',
-                        default=["val", "train"],
-                        type=str)
-    args = parser.parse_args()
-    convert2panoptic(args.cityscapesPath, args.outputFolder, args.useTrainId, args.setNames)
-# call the main
-if __name__ == "__main__":
-    main()

ext/cityscapes_scripts/helpers/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # empty

ext/cityscapes_scripts/helpers/annotation.py DELETED Viewed

@@ -1,441 +0,0 @@
-#!/usr/bin/python
-#
-# Classes to store, read, and write annotations
-#
-from __future__ import print_function, absolute_import, division
-import os
-import json
-import numpy as np
-from collections import namedtuple
-# get current date and time
-import datetime
-import locale
-from abc import ABCMeta, abstractmethod
-from .box3dImageTransform import Camera
-# A point in a polygon
-Point = namedtuple('Point', ['x', 'y'])
-class CsObjectType():
-    """Type of an object"""
-    POLY = 1  # polygon
-    BBOX2D = 2  # bounding box
-    BBOX3D = 3  # 3d bounding box
-    IGNORE2D = 4  # 2d ignore region
-class CsObject:
-    """Abstract base class for annotation objects"""
-    __metaclass__ = ABCMeta
-    def __init__(self, objType):
-        self.objectType = objType
-        # the label
-        self.label = ""
-        # If deleted or not
-        self.deleted = 0
-        # If verified or not
-        self.verified = 0
-        # The date string
-        self.date = ""
-        # The username
-        self.user = ""
-        # Draw the object
-        # Not read from or written to JSON
-        # Set to False if deleted object
-        # Might be set to False by the application for other reasons
-        self.draw = True
-    @abstractmethod
-    def __str__(self): pass
-    @abstractmethod
-    def fromJsonText(self, jsonText, objId=-1): pass
-    @abstractmethod
-    def toJsonText(self): pass
-    def updateDate(self):
-        try:
-            locale.setlocale(locale.LC_ALL, 'en_US.utf8')
-        except locale.Error:
-            locale.setlocale(locale.LC_ALL, 'en_US')
-        except locale.Error:
-            locale.setlocale(locale.LC_ALL, 'us_us.utf8')
-        except locale.Error:
-            locale.setlocale(locale.LC_ALL, 'us_us')
-        except Exception:
-            pass
-        self.date = datetime.datetime.now().strftime("%d-%b-%Y %H:%M:%S")
-    # Mark the object as deleted
-    def delete(self):
-        self.deleted = 1
-        self.draw = False
-class CsPoly(CsObject):
-    """Class that contains the information of a single annotated object as polygon"""
-    # Constructor
-    def __init__(self):
-        CsObject.__init__(self, CsObjectType.POLY)
-        # the polygon as list of points
-        self.polygon = []
-        # the object ID
-        self.id = -1
-    def __str__(self):
-        polyText = ""
-        if self.polygon:
-            if len(self.polygon) <= 4:
-                for p in self.polygon:
-                    polyText += '({},{}) '.format(p.x, p.y)
-            else:
-                polyText += '({},{}) ({},{}) ... ({},{}) ({},{})'.format(
-                    self.polygon[0].x, self.polygon[0].y,
-                    self.polygon[1].x, self.polygon[1].y,
-                    self.polygon[-2].x, self.polygon[-2].y,
-                    self.polygon[-1].x, self.polygon[-1].y)
-        else:
-            polyText = "none"
-        text = "Object: {} - {}".format(self.label, polyText)
-        return text
-    def fromJsonText(self, jsonText, objId=-1):
-        self.id = objId
-        self.label = str(jsonText['label'])
-        self.polygon = [Point(p[0], p[1]) for p in jsonText['polygon']]
-        if 'deleted' in jsonText.keys():
-            self.deleted = jsonText['deleted']
-        else:
-            self.deleted = 0
-        if 'verified' in jsonText.keys():
-            self.verified = jsonText['verified']
-        else:
-            self.verified = 1
-        if 'user' in jsonText.keys():
-            self.user = jsonText['user']
-        else:
-            self.user = ''
-        if 'date' in jsonText.keys():
-            self.date = jsonText['date']
-        else:
-            self.date = ''
-        if self.deleted == 1:
-            self.draw = False
-        else:
-            self.draw = True
-    def toJsonText(self):
-        objDict = {}
-        objDict['label'] = self.label
-        objDict['id'] = self.id
-        objDict['deleted'] = self.deleted
-        objDict['verified'] = self.verified
-        objDict['user'] = self.user
-        objDict['date'] = self.date
-        objDict['polygon'] = []
-        for pt in self.polygon:
-            objDict['polygon'].append([pt.x, pt.y])
-        return objDict
-class CsBbox2d(CsObject):
-    """Class that contains the information of a single annotated object as bounding box"""
-    # Constructor
-    def __init__(self):
-        CsObject.__init__(self, CsObjectType.BBOX2D)
-        # the polygon as list of points
-        self.bbox_amodal_xywh = []
-        self.bbox_modal_xywh = []
-        # the ID of the corresponding object
-        self.instanceId = -1
-        # the label of the corresponding object
-        self.label = ""
-    def __str__(self):
-        bboxAmodalText = ""
-        bboxAmodalText += '[(x1: {}, y1: {}), (w: {}, h: {})]'.format(
-            self.bbox_amodal_xywh[0], self.bbox_amodal_xywh[1],  self.bbox_amodal_xywh[2],  self.bbox_amodal_xywh[3])
-        bboxModalText = ""
-        bboxModalText += '[(x1: {}, y1: {}), (w: {}, h: {})]'.format(
-            self.bbox_modal_xywh[0], self.bbox_modal_xywh[1], self.bbox_modal_xywh[2], self.bbox_modal_xywh[3])
-        text = "Object: {}\n - Amodal {}\n - Modal {}".format(
-            self.label, bboxAmodalText, bboxModalText)
-        return text
-    def setAmodalBox(self, bbox_amodal):
-        # sets the amodal box if required
-        self.bbox_amodal_xywh = [
-            bbox_amodal[0],
-            bbox_amodal[1],
-            bbox_amodal[2] - bbox_amodal[0],
-            bbox_amodal[3] - bbox_amodal[1]
-        ]
-    # access 2d boxes in [xmin, ymin, xmax, ymax] format
-    @property
-    def bbox_amodal(self):
-        """Returns the 2d box as [xmin, ymin, xmax, ymax]"""
-        return [
-            self.bbox_amodal_xywh[0],
-            self.bbox_amodal_xywh[1],
-            self.bbox_amodal_xywh[0] + self.bbox_amodal_xywh[2],
-            self.bbox_amodal_xywh[1] + self.bbox_amodal_xywh[3]
-        ]
-    @property
-    def bbox_modal(self):
-        """Returns the 2d box as [xmin, ymin, xmax, ymax]"""
-        return [
-            self.bbox_modal_xywh[0],
-            self.bbox_modal_xywh[1],
-            self.bbox_modal_xywh[0] + self.bbox_modal_xywh[2],
-            self.bbox_modal_xywh[1] + self.bbox_modal_xywh[3]
-        ]
-    def fromJsonText(self, jsonText, objId=-1):
-        # try to load from cityperson format
-        if 'bbox' in jsonText.keys() and 'bboxVis' in jsonText.keys():
-            self.bbox_amodal_xywh = jsonText['bbox']
-            self.bbox_modal_xywh = jsonText['bboxVis']
-        # both modal and amodal boxes are provided
-        elif "modal" in jsonText.keys() and "amodal" in jsonText.keys():
-            self.bbox_amodal_xywh = jsonText['amodal']
-            self.bbox_modal_xywh = jsonText['modal']
-        # only amodal boxes are provided
-        else:
-            self.bbox_modal_xywh = jsonText['amodal']
-            self.bbox_amodal_xywh = jsonText['amodal']
-        # load label and instanceId if available
-        if 'label' in jsonText.keys() and 'instanceId' in jsonText.keys():
-            self.label = str(jsonText['label'])
-            self.instanceId = jsonText['instanceId']
-    def toJsonText(self):
-        objDict = {}
-        objDict['label'] = self.label
-        objDict['instanceId'] = self.instanceId
-        objDict['modal'] = self.bbox_modal_xywh
-        objDict['amodal'] = self.bbox_amodal_xywh
-        return objDict
-class CsBbox3d(CsObject):
-    """Class that contains the information of a single annotated object as 3D bounding box"""
-    # Constructor
-    def __init__(self):
-        CsObject.__init__(self, CsObjectType.BBOX3D)
-        self.bbox_2d = None
-        self.center = []
-        self.dims = []
-        self.rotation = []
-        self.instanceId = -1
-        self.label = ""
-        self.score = -1.
-    def __str__(self):
-        bbox2dText = str(self.bbox_2d)
-        bbox3dText = ""
-        bbox3dText += '\n - Center (x/y/z) [m]: {}/{}/{}'.format(
-            self.center[0], self.center[1],  self.center[2])
-        bbox3dText += '\n - Dimensions (l/w/h) [m]: {}/{}/{}'.format(
-            self.dims[0], self.dims[1],  self.dims[2])
-        bbox3dText += '\n - Rotation: {}/{}/{}/{}'.format(
-            self.rotation[0], self.rotation[1], self.rotation[2], self.rotation[3])
-        text = "Object: {}\n2D {}\n - 3D {}".format(
-            self.label, bbox2dText, bbox3dText)
-        return text
-    def fromJsonText(self, jsonText, objId=-1):
-        # load 2D box
-        self.bbox_2d = CsBbox2d()
-        self.bbox_2d.fromJsonText(jsonText['2d'])
-        self.center = jsonText['3d']['center']
-        self.dims = jsonText['3d']['dimensions']
-        self.rotation = jsonText['3d']['rotation']
-        self.label = jsonText['label']
-        self.score = jsonText['score']
-        if 'instanceId' in jsonText.keys():
-            self.instanceId = jsonText['instanceId']
-    def toJsonText(self):
-        objDict = {}
-        objDict['label'] = self.label
-        objDict['instanceId'] = self.instanceId
-        objDict['2d']['amodal'] = self.bbox_2d.bbox_amodal_xywh
-        objDict['2d']['modal'] = self.bbox_2d.bbox_modal_xywh
-        objDict['3d']['center'] = self.center
-        objDict['3d']['dimensions'] = self.dims
-        objDict['3d']['rotation'] = self.rotation
-        return objDict
-    @property
-    def depth(self):
-        # returns the BEV depth
-        return np.sqrt(self.center[0]**2 + self.center[1]**2).astype(int)
-class CsIgnore2d(CsObject):
-    """Class that contains the information of a single annotated 2d ignore region"""
-    # Constructor
-    def __init__(self):
-        CsObject.__init__(self, CsObjectType.IGNORE2D)
-        self.bbox_xywh = []
-        self.label = ""
-        self.instanceId = -1
-    def __str__(self):
-        bbox2dText = ""
-        bbox2dText += 'Ignore Region:  (x1: {}, y1: {}), (w: {}, h: {})'.format(
-            self.bbox_xywh[0], self.bbox_xywh[1], self.bbox_xywh[2], self.bbox_xywh[3])
-        return bbox2dText
-    def fromJsonText(self, jsonText, objId=-1):
-        self.bbox_xywh = jsonText['2d']
-        if 'label' in jsonText.keys():
-            self.label = jsonText['label']
-        if 'instanceId' in jsonText.keys():
-            self.instanceId = jsonText['instanceId']
-    def toJsonText(self):
-        objDict = {}
-        objDict['label'] = self.label
-        objDict['instanceId'] = self.instanceId
-        objDict['2d'] = self.bbox_xywh
-        return objDict
-    @property
-    def bbox(self):
-        """Returns the 2d box as [xmin, ymin, xmax, ymax]"""
-        return [
-            self.bbox_xywh[0],
-            self.bbox_xywh[1],
-            self.bbox_xywh[0] + self.bbox_xywh[2],
-            self.bbox_xywh[1] + self.bbox_xywh[3]
-        ]
-    # Extend api to be compatible to bbox2d
-    @property
-    def bbox_amodal_xywh(self):
-        return self.bbox_xywh
-    @property
-    def bbox_modal_xywh(self):
-        return self.bbox_xywh
-class Annotation:
-    """The annotation of a whole image (doesn't support mixed annotations, i.e. combining CsPoly and CsBbox2d)"""
-    # Constructor
-    def __init__(self, objType=CsObjectType.POLY):
-        # the width of that image and thus of the label image
-        self.imgWidth = 0
-        # the height of that image and thus of the label image
-        self.imgHeight = 0
-        # the list of objects
-        self.objects = []
-        # the camera calibration
-        self.camera = None
-        assert objType in CsObjectType.__dict__.values()
-        self.objectType = objType
-    def toJson(self):
-        return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
-    def fromJsonText(self, jsonText):
-        jsonDict = json.loads(jsonText)
-        self.imgWidth = int(jsonDict['imgWidth'])
-        self.imgHeight = int(jsonDict['imgHeight'])
-        self.objects = []
-        # load objects
-        if self.objectType != CsObjectType.IGNORE2D:
-            for objId, objIn in enumerate(jsonDict['objects']):
-                if self.objectType == CsObjectType.POLY:
-                    obj = CsPoly()
-                elif self.objectType == CsObjectType.BBOX2D:
-                    obj = CsBbox2d()
-                elif self.objectType == CsObjectType.BBOX3D:
-                    obj = CsBbox3d()
-                obj.fromJsonText(objIn, objId)
-                self.objects.append(obj)
-        # load ignores
-        if 'ignore' in jsonDict.keys():
-            for ignoreId, ignoreIn in enumerate(jsonDict['ignore']):
-                obj = CsIgnore2d()
-                obj.fromJsonText(ignoreIn, ignoreId)
-                self.objects.append(obj)
-        # load camera calibration
-        if 'sensor' in jsonDict.keys():
-            self.camera = Camera(fx=jsonDict['sensor']['fx'],
-                                 fy=jsonDict['sensor']['fy'],
-                                 u0=jsonDict['sensor']['u0'],
-                                 v0=jsonDict['sensor']['v0'],
-                                 sensor_T_ISO_8855=jsonDict['sensor']['sensor_T_ISO_8855'])
-    def toJsonText(self):
-        jsonDict = {}
-        jsonDict['imgWidth'] = self.imgWidth
-        jsonDict['imgHeight'] = self.imgHeight
-        jsonDict['objects'] = []
-        for obj in self.objects:
-            objDict = obj.toJsonText()
-            jsonDict['objects'].append(objDict)
-        return jsonDict
-    # Read a json formatted polygon file and return the annotation
-    def fromJsonFile(self, jsonFile):
-        if not os.path.isfile(jsonFile):
-            print('Given json file not found: {}'.format(jsonFile))
-            return
-        with open(jsonFile, 'r') as f:
-            jsonText = f.read()
-            self.fromJsonText(jsonText)
-    def toJsonFile(self, jsonFile):
-        with open(jsonFile, 'w') as f:
-            f.write(self.toJson())
-# a dummy example
-if __name__ == "__main__":
-    obj = CsPoly()
-    obj.label = 'car'
-    obj.polygon.append(Point(0, 0))
-    obj.polygon.append(Point(1, 0))
-    obj.polygon.append(Point(1, 1))
-    obj.polygon.append(Point(0, 1))
-    print(type(obj).__name__)
-    print(obj)

ext/cityscapes_scripts/helpers/csHelpers.py DELETED Viewed

@@ -1,129 +0,0 @@
-#!/usr/bin/python
-#
-# Various helper methods and includes for Cityscapes
-#
-# Python imports
-from __future__ import print_function, absolute_import, division
-import os
-import sys
-import getopt
-import glob
-import math
-import json
-from collections import namedtuple
-import logging
-import traceback
-# Image processing
-from PIL import Image
-from PIL import ImageDraw
-# Numpy for datastructures
-import numpy as np
-# Cityscapes modules
-# from .annotation import Annotation
-from .labels import labels, name2label, id2label, trainId2label, category2labels
-def printError(message):
-    """Print an error message and quit"""
-    print('ERROR: ' + str(message))
-    sys.exit(-1)
-class colors:
-    """Class for colors"""
-    RED = '\033[31;1m'
-    GREEN = '\033[32;1m'
-    YELLOW = '\033[33;1m'
-    BLUE = '\033[34;1m'
-    MAGENTA = '\033[35;1m'
-    CYAN = '\033[36;1m'
-    BOLD = '\033[1m'
-    UNDERLINE = '\033[4m'
-    ENDC = '\033[0m'
-def getColorEntry(val, args):
-    """Colored value output if colorized flag is activated."""
-    if not args.colorized:
-        return ""
-    if not isinstance(val, float) or math.isnan(val):
-        return colors.ENDC
-    if (val < .20):
-        return colors.RED
-    elif (val < .40):
-        return colors.YELLOW
-    elif (val < .60):
-        return colors.BLUE
-    elif (val < .80):
-        return colors.CYAN
-    else:
-        return colors.GREEN
-# Cityscapes files have a typical filename structure
-# <city>_<sequenceNb>_<frameNb>_<type>[_<type2>].<ext>
-# This class contains the individual elements as members
-# For the sequence and frame number, the strings are returned, including leading zeros
-CsFile = namedtuple('csFile', ['city', 'sequenceNb', 'frameNb', 'type', 'type2', 'ext'])
-def getCsFileInfo(fileName):
-    """Returns a CsFile object filled from the info in the given filename"""
-    baseName = os.path.basename(fileName)
-    parts = baseName.split('_')
-    parts = parts[:-1] + parts[-1].split('.')
-    if not parts:
-        printError('Cannot parse given filename ({}). Does not seem to be a valid Cityscapes file.'.format(fileName))
-    if len(parts) == 5:
-        csFile = CsFile(*parts[:-1], type2="", ext=parts[-1])
-    elif len(parts) == 6:
-        csFile = CsFile(*parts)
-    else:
-        printError('Found {} part(s) in given filename ({}). Expected 5 or 6.'.format(len(parts), fileName))
-    return csFile
-def getCoreImageFileName(filename):
-    """Returns the part of Cityscapes filenames that is common to all data types
-    e.g. for city_123456_123456_gtFine_polygons.json returns city_123456_123456
-    """
-    csFile = getCsFileInfo(filename)
-    return "{}_{}_{}".format(csFile.city, csFile.sequenceNb, csFile.frameNb)
-def getDirectory(fileName):
-    """Returns the directory name for the given filename
-    e.g.
-    fileName = "/foo/bar/foobar.txt"
-    return value is "bar"
-    Not much error checking though
-    """
-    dirName = os.path.dirname(fileName)
-    return os.path.basename(dirName)
-def ensurePath(path):
-    """Make sure that the given path exists"""
-    if not path:
-        return
-    if not os.path.isdir(path):
-        os.makedirs(path)
-def writeDict2JSON(dictName, fileName):
-    """Write a dictionary as json file"""
-    with open(fileName, 'w') as f:
-        f.write(json.dumps(dictName, default=lambda o: o.__dict__, sort_keys=True, indent=4))
-# dummy main
-if __name__ == "__main__":
-    printError("Only for include, not executable on its own.")

ext/cityscapes_scripts/helpers/labels.py DELETED Viewed

@@ -1,182 +0,0 @@
-#!/usr/bin/python
-#
-# Cityscapes labels
-#
-from __future__ import print_function, absolute_import, division
-from collections import namedtuple
-#--------------------------------------------------------------------------------
-# Definitions
-#--------------------------------------------------------------------------------
-# a label and all meta information
-Label = namedtuple( 'Label' , [
-    'name'        , # The identifier of this label, e.g. 'car', 'person', ... .
-                    # We use them to uniquely name a class
-    'id'          , # An integer ID that is associated with this label.
-                    # The IDs are used to represent the label in ground truth images
-                    # An ID of -1 means that this label does not have an ID and thus
-                    # is ignored when creating ground truth images (e.g. license plate).
-                    # Do not modify these IDs, since exactly these IDs are expected by the
-                    # evaluation server.
-    'trainId'     , # Feel free to modify these IDs as suitable for your method. Then create
-                    # ground truth images with train IDs, using the tools provided in the
-                    # 'preparation' folder. However, make sure to validate or submit results
-                    # to our evaluation server using the regular IDs above!
-                    # For trainIds, multiple labels might have the same ID. Then, these labels
-                    # are mapped to the same class in the ground truth images. For the inverse
-                    # mapping, we use the label that is defined first in the list below.
-                    # For example, mapping all void-type classes to the same ID in training,
-                    # might make sense for some approaches.
-                    # Max value is 255!
-    'category'    , # The name of the category that this label belongs to
-    'categoryId'  , # The ID of this category. Used to create ground truth images
-                    # on category level.
-    'hasInstances', # Whether this label distinguishes between single instances or not
-    'ignoreInEval', # Whether pixels having this class as ground truth label are ignored
-                    # during evaluations or not
-    'color'       , # The color of this label
-    ] )
-#--------------------------------------------------------------------------------
-# A list of all labels
-#--------------------------------------------------------------------------------
-# Please adapt the train IDs as appropriate for your approach.
-# Note that you might want to ignore labels with ID 255 during training.
-# Further note that the current train IDs are only a suggestion. You can use whatever you like.
-# Make sure to provide your results using the original IDs and not the training IDs.
-# Note that many IDs are ignored in evaluation and thus you never need to predict these!
-labels = [
-    #       name                     id    trainId   category            catId     hasInstances   ignoreInEval   color
-    Label(  'unlabeled'            ,  0 ,      255 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
-    Label(  'ego vehicle'          ,  1 ,      255 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
-    Label(  'rectification border' ,  2 ,      255 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
-    Label(  'out of roi'           ,  3 ,      255 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
-    Label(  'static'               ,  4 ,      255 , 'void'            , 0       , False        , True         , (  0,  0,  0) ),
-    Label(  'dynamic'              ,  5 ,      255 , 'void'            , 0       , False        , True         , (111, 74,  0) ),
-    Label(  'ground'               ,  6 ,      255 , 'void'            , 0       , False        , True         , ( 81,  0, 81) ),
-    Label(  'road'                 ,  7 ,        0 + 8, 'flat'            , 1       , False        , False        , (128, 64,128) ),
-    Label(  'sidewalk'             ,  8 ,        1 + 8, 'flat'            , 1       , False        , False        , (244, 35,232) ),
-    Label(  'parking'              ,  9 ,      255 , 'flat'            , 1       , False        , True         , (250,170,160) ),
-    Label(  'rail track'           , 10 ,      255 , 'flat'            , 1       , False        , True         , (230,150,140) ),
-    Label(  'building'             , 11 ,        2 + 8, 'construction'    , 2       , False        , False        , ( 70, 70, 70) ),
-    Label(  'wall'                 , 12 ,        3 + 8, 'construction'    , 2       , False        , False        , (102,102,156) ),
-    Label(  'fence'                , 13 ,        4 + 8, 'construction'    , 2       , False        , False        , (190,153,153) ),
-    Label(  'guard rail'           , 14 ,      255 , 'construction'    , 2       , False        , True         , (180,165,180) ),
-    Label(  'bridge'               , 15 ,      255 , 'construction'    , 2       , False        , True         , (150,100,100) ),
-    Label(  'tunnel'               , 16 ,      255 , 'construction'    , 2       , False        , True         , (150,120, 90) ),
-    Label(  'pole'                 , 17 ,        5 + 8, 'object'          , 3       , False        , False        , (153,153,153) ),
-    Label(  'polegroup'            , 18 ,      255 , 'object'          , 3       , False        , True         , (153,153,153) ),
-    Label(  'traffic light'        , 19 ,        6 + 8, 'object'          , 3       , False        , False        , (250,170, 30) ),
-    Label(  'traffic sign'         , 20 ,        7 + 8, 'object'          , 3       , False        , False        , (220,220,  0) ),
-    Label(  'vegetation'           , 21 ,        8 + 8, 'nature'          , 4       , False        , False        , (107,142, 35) ),
-    Label(  'terrain'              , 22 ,        9 + 8, 'nature'          , 4       , False        , False        , (152,251,152) ),
-    Label(  'sky'                  , 23 ,       10 + 8, 'sky'             , 5       , False        , False        , ( 70,130,180) ),
-    Label(  'person'               , 24 ,       11 - 11 , 'human'           , 6       , True         , False        , (220, 20, 60) ),
-    Label(  'rider'                , 25 ,       12 - 11 , 'human'           , 6       , True         , False        , (255,  0,  0) ),
-    Label(  'car'                  , 26 ,       13 - 11, 'vehicle'         , 7       , True         , False        , (  0,  0,142) ),
-    Label(  'truck'                , 27 ,       14 - 11, 'vehicle'         , 7       , True         , False        , (  0,  0, 70) ),
-    Label(  'bus'                  , 28 ,       15 - 11, 'vehicle'         , 7       , True         , False        , (  0, 60,100) ),
-    Label(  'caravan'              , 29 ,      255 , 'vehicle'         , 7       , True         , True         , (  0,  0, 90) ),
-    Label(  'trailer'              , 30 ,      255 , 'vehicle'         , 7       , True         , True         , (  0,  0,110) ),
-    Label(  'train'                , 31 ,       16 - 11, 'vehicle'         , 7       , True         , False        , (  0, 80,100) ),
-    Label(  'motorcycle'           , 32 ,       17 - 11, 'vehicle'         , 7       , True         , False        , (  0,  0,230) ),
-    Label(  'bicycle'              , 33 ,       18 - 11, 'vehicle'         , 7       , True         , False        , (119, 11, 32) ),
-    Label(  'license plate'        , -1 ,       -1 , 'vehicle'         , 7       , False        , True         , (  0,  0,142) ),
-]
-#--------------------------------------------------------------------------------
-# Create dictionaries for a fast lookup
-#--------------------------------------------------------------------------------
-# Please refer to the main method below for example usages!
-# name to label object
-name2label      = { label.name    : label for label in labels           }
-# id to label object
-id2label        = { label.id      : label for label in labels           }
-# trainId to label object
-trainId2label   = { label.trainId : label for label in reversed(labels) }
-# category to list of label objects
-category2labels = {}
-for label in labels:
-    category = label.category
-    if category in category2labels:
-        category2labels[category].append(label)
-    else:
-        category2labels[category] = [label]
-#--------------------------------------------------------------------------------
-# Assure single instance name
-#--------------------------------------------------------------------------------
-# returns the label name that describes a single instance (if possible)
-# e.g.     input     |   output
-#        ----------------------
-#          car       |   car
-#          cargroup  |   car
-#          foo       |   None
-#          foogroup  |   None
-#          skygroup  |   None
-def assureSingleInstanceName( name ):
-    # if the name is known, it is not a group
-    if name in name2label:
-        return name
-    # test if the name actually denotes a group
-    if not name.endswith("group"):
-        return None
-    # remove group
-    name = name[:-len("group")]
-    # test if the new name exists
-    if not name in name2label:
-        return None
-    # test if the new name denotes a label that actually has instances
-    if not name2label[name].hasInstances:
-        return None
-    # all good then
-    return name
-#--------------------------------------------------------------------------------
-# Main for testing
-#--------------------------------------------------------------------------------
-# just a dummy main
-if __name__ == "__main__":
-    # Print all the labels
-    print("List of cityscapes labels:")
-    print("")
-    print("    {:>21} | {:>3} | {:>7} | {:>14} | {:>10} | {:>12} | {:>12}".format( 'name', 'id', 'trainId', 'category', 'categoryId', 'hasInstances', 'ignoreInEval' ))
-    print("    " + ('-' * 98))
-    for label in labels:
-        print("    {:>21} | {:>3} | {:>7} | {:>14} | {:>10} | {:>12} | {:>12}".format( label.name, label.id, label.trainId, label.category, label.categoryId, label.hasInstances, label.ignoreInEval ))
-    print("")
-    print("Example usages:")
-    # Map from name to label
-    name = 'car'
-    id   = name2label[name].id
-    print("ID of label '{name}': {id}".format( name=name, id=id ))
-    # Map from ID to label
-    category = id2label[id].category
-    print("Category of label with ID '{id}': {category}".format( id=id, category=category ))
-    # Map from trainID to label
-    trainId = 0
-    name = trainId2label[trainId].name
-    print("Name of label with trainID '{id}': {name}".format( id=trainId, name=name ))

ext/cityscapes_scripts/helpers/labels_cityPersons.py DELETED Viewed

@@ -1,61 +0,0 @@
-#!/usr/bin/python
-#
-# CityPersons (cp) labels
-#
-from __future__ import print_function, absolute_import, division
-from collections import namedtuple
-#--------------------------------------------------------------------------------
-# Definitions
-#--------------------------------------------------------------------------------
-# a label and all meta information
-LabelCp = namedtuple( 'LabelCp' , [
-    'name'        , # The identifier of this label, e.g. 'pedestrian', 'rider', ... .
-                    # We use them to uniquely name a class
-    'id'          , # An integer ID that is associated with this label.
-                    # The IDs are used to represent the label in ground truth
-    'hasInstances', # Whether this label distinguishes between single instances or not
-    'ignoreInEval', # Whether pixels having this class as ground truth label are ignored
-                    # during evaluations or not
-    'color'       , # The color of this label
-    ] )
-#--------------------------------------------------------------------------------
-# A list of all labels
-#--------------------------------------------------------------------------------
-# The 'ignore' label covers representations of humans, e.g. people on posters, reflections etc.
-# Each annotation includes both the full bounding box (bbox) as well as a bounding box covering the visible area (bboxVis).
-# The latter is obtained automatically from the segmentation masks.
-labelsCp = [
-    #         name                     id   hasInstances   ignoreInEval   color
-    LabelCp(  'ignore'               ,  0 , False        , True         , (250,170, 30) ),
-    LabelCp(  'pedestrian'           ,  1 , True         , False        , (220, 20, 60) ),
-    LabelCp(  'rider'                ,  2 , True         , False        , (  0,  0,142) ),
-    LabelCp(  'sitting person'       ,  3 , True         , False        , (107,142, 35) ),
-    LabelCp(  'person (other)'       ,  4 , True         , False        , (190,153,153) ),
-    LabelCp(  'person group'         ,  5 , False        , True         , (255,  0,  0) ),
-]
-#--------------------------------------------------------------------------------
-# Create dictionaries for a fast lookup
-#--------------------------------------------------------------------------------
-# Please refer to the main method below for example usages!
-# name to label object
-name2labelCp      = { label.name    : label for label in labelsCp }
-# id to label object
-id2labelCp        = { label.id      : label for label in labelsCp }

ext/cityscapes_scripts/helpers/version.py DELETED Viewed

@@ -1,9 +0,0 @@
-#!/usr/bin/env python
-import os
-with open(os.path.join(os.path.dirname(__file__), '..', 'VERSION')) as f:
-    version = f.read().strip()
-if __name__ == "__main__":
-    print(version)

ext/davis2017/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from __future__ import absolute_import
-__version__ = '0.1.0'

ext/davis2017/davis.py DELETED Viewed

@@ -1,122 +0,0 @@
-import os
-from glob import glob
-from collections import defaultdict
-import numpy as np
-from PIL import Image
-class DAVIS(object):
-    SUBSET_OPTIONS = ['train', 'val', 'test-dev', 'test-challenge']
-    TASKS = ['semi-supervised', 'unsupervised']
-    DATASET_WEB = 'https://davischallenge.org/davis2017/code.html'
-    VOID_LABEL = 255
-    def __init__(self, root, task='unsupervised', subset='val', sequences='all', resolution='480p', codalab=False):
-        """
-        Class to read the DAVIS dataset
-        :param root: Path to the DAVIS folder that contains JPEGImages, Annotations, etc. folders.
-        :param task: Task to load the annotations, choose between semi-supervised or unsupervised.
-        :param subset: Set to load the annotations
-        :param sequences: Sequences to consider, 'all' to use all the sequences in a set.
-        :param resolution: Specify the resolution to use the dataset, choose between '480' and 'Full-Resolution'
-        """
-        if subset not in self.SUBSET_OPTIONS:
-            raise ValueError(f'Subset should be in {self.SUBSET_OPTIONS}')
-        if task not in self.TASKS:
-            raise ValueError(f'The only tasks that are supported are {self.TASKS}')
-        self.task = task
-        self.subset = subset
-        self.root = root
-        self.img_path = os.path.join(self.root, 'JPEGImages', resolution)
-        annotations_folder = 'Annotations' if task == 'semi-supervised' else 'Annotations_unsupervised'
-        self.mask_path = os.path.join(self.root, annotations_folder, resolution)
-        year = '2019' if task == 'unsupervised' and (subset == 'test-dev' or subset == 'test-challenge') else '2017'
-        self.imagesets_path = os.path.join(self.root, 'ImageSets', year)
-        self._check_directories()
-        if sequences == 'all':
-            with open(os.path.join(self.imagesets_path, f'{self.subset}.txt'), 'r') as f:
-                tmp = f.readlines()
-            sequences_names = [x.strip() for x in tmp]
-        else:
-            sequences_names = sequences if isinstance(sequences, list) else [sequences]
-        self.sequences = defaultdict(dict)
-        for seq in sequences_names:
-            images = np.sort(glob(os.path.join(self.img_path, seq, '*.jpg'))).tolist()
-            if len(images) == 0 and not codalab:
-                raise FileNotFoundError(f'Images for sequence {seq} not found.')
-            self.sequences[seq]['images'] = images
-            masks = np.sort(glob(os.path.join(self.mask_path, seq, '*.png'))).tolist()
-            masks.extend([-1] * (len(images) - len(masks)))
-            self.sequences[seq]['masks'] = masks
-    def _check_directories(self):
-        if not os.path.exists(self.root):
-            raise FileNotFoundError(f'DAVIS not found in the specified directory, download it from {self.DATASET_WEB}')
-        if not os.path.exists(os.path.join(self.imagesets_path, f'{self.subset}.txt')):
-            raise FileNotFoundError(f'Subset sequences list for {self.subset} not found, download the missing subset '
-                                    f'for the {self.task} task from {self.DATASET_WEB}')
-        if self.subset in ['train', 'val'] and not os.path.exists(self.mask_path):
-            raise FileNotFoundError(f'Annotations folder for the {self.task} task not found, download it from {self.DATASET_WEB}')
-    def get_frames(self, sequence):
-        for img, msk in zip(self.sequences[sequence]['images'], self.sequences[sequence]['masks']):
-            image = np.array(Image.open(img))
-            mask = None if msk is None else np.array(Image.open(msk))
-            yield image, mask
-    def _get_all_elements(self, sequence, obj_type):
-        obj = np.array(Image.open(self.sequences[sequence][obj_type][0]))
-        all_objs = np.zeros((len(self.sequences[sequence][obj_type]), *obj.shape))
-        obj_id = []
-        for i, obj in enumerate(self.sequences[sequence][obj_type]):
-            all_objs[i, ...] = np.array(Image.open(obj))
-            obj_id.append(''.join(obj.split('/')[-1].split('.')[:-1]))
-        return all_objs, obj_id
-    def get_all_images(self, sequence):
-        return self._get_all_elements(sequence, 'images')
-    def get_all_masks(self, sequence, separate_objects_masks=False):
-        masks, masks_id = self._get_all_elements(sequence, 'masks')
-        masks_void = np.zeros_like(masks)
-        # Separate void and object masks
-        for i in range(masks.shape[0]):
-            masks_void[i, ...] = masks[i, ...] == 255
-            masks[i, masks[i, ...] == 255] = 0
-        if separate_objects_masks:
-            num_objects = int(np.max(masks[0, ...]))
-            tmp = np.ones((num_objects, *masks.shape))
-            tmp = tmp * np.arange(1, num_objects + 1)[:, None, None, None]
-            masks = (tmp == masks[None, ...])
-            masks = masks > 0
-        return masks, masks_void, masks_id
-    def get_sequences(self):
-        for seq in self.sequences:
-            yield seq
-if __name__ == '__main__':
-    from matplotlib import pyplot as plt
-    only_first_frame = True
-    subsets = ['train', 'val']
-    for s in subsets:
-        dataset = DAVIS(root='/home/csergi/scratch2/Databases/DAVIS2017_private', subset=s)
-        for seq in dataset.get_sequences():
-            g = dataset.get_frames(seq)
-            img, mask = next(g)
-            plt.subplot(2, 1, 1)
-            plt.title(seq)
-            plt.imshow(img)
-            plt.subplot(2, 1, 2)
-            plt.imshow(mask)
-            plt.show(block=True)

ext/davis2017/evaluation.py DELETED Viewed

@@ -1,110 +0,0 @@
-import sys
-from tqdm import tqdm
-import warnings
-warnings.filterwarnings("ignore", category=RuntimeWarning)
-import numpy as np
-from ext.davis2017.davis import DAVIS
-from ext.davis2017.metrics import db_eval_boundary, db_eval_iou
-from ext.davis2017 import utils
-from ext.davis2017.results import Results
-from scipy.optimize import linear_sum_assignment
-class DAVISEvaluation(object):
-    def __init__(self, davis_root, task, gt_set, sequences='all', codalab=False):
-        """
-        Class to evaluate DAVIS sequences from a certain set and for a certain task
-        :param davis_root: Path to the DAVIS folder that contains JPEGImages, Annotations, etc. folders.
-        :param task: Task to compute the evaluation, chose between semi-supervised or unsupervised.
-        :param gt_set: Set to compute the evaluation
-        :param sequences: Sequences to consider for the evaluation, 'all' to use all the sequences in a set.
-        """
-        self.davis_root = davis_root
-        self.task = task
-        self.dataset = DAVIS(root=davis_root, task=task, subset=gt_set, sequences=sequences, codalab=codalab)
-    @staticmethod
-    def _evaluate_semisupervised(all_gt_masks, all_res_masks, all_void_masks, metric):
-        if all_res_masks.shape[0] > all_gt_masks.shape[0]:
-            sys.stdout.write("\nIn your PNG files there is an index higher than the number of objects in the sequence!")
-            sys.exit()
-        elif all_res_masks.shape[0] < all_gt_masks.shape[0]:
-            zero_padding = np.zeros((all_gt_masks.shape[0] - all_res_masks.shape[0], *all_res_masks.shape[1:]))
-            all_res_masks = np.concatenate([all_res_masks, zero_padding], axis=0)
-        j_metrics_res, f_metrics_res = np.zeros(all_gt_masks.shape[:2]), np.zeros(all_gt_masks.shape[:2])
-        for ii in range(all_gt_masks.shape[0]):
-            if 'J' in metric:
-                j_metrics_res[ii, :] = db_eval_iou(all_gt_masks[ii, ...], all_res_masks[ii, ...], all_void_masks)
-            if 'F' in metric:
-                f_metrics_res[ii, :] = db_eval_boundary(all_gt_masks[ii, ...], all_res_masks[ii, ...], all_void_masks)
-        return j_metrics_res, f_metrics_res
-    @staticmethod
-    def _evaluate_unsupervised(all_gt_masks, all_res_masks, all_void_masks, metric, max_n_proposals=20):
-        if all_res_masks.shape[0] > max_n_proposals:
-            sys.stdout.write(f"\nIn your PNG files there is an index higher than the maximum number ({max_n_proposals}) of proposals allowed!")
-            sys.exit()
-        elif all_res_masks.shape[0] < all_gt_masks.shape[0]:
-            zero_padding = np.zeros((all_gt_masks.shape[0] - all_res_masks.shape[0], *all_res_masks.shape[1:]))
-            all_res_masks = np.concatenate([all_res_masks, zero_padding], axis=0)
-        j_metrics_res = np.zeros((all_res_masks.shape[0], all_gt_masks.shape[0], all_gt_masks.shape[1]))
-        f_metrics_res = np.zeros((all_res_masks.shape[0], all_gt_masks.shape[0], all_gt_masks.shape[1]))
-        for ii in range(all_gt_masks.shape[0]):
-            for jj in range(all_res_masks.shape[0]):
-                if 'J' in metric:
-                    j_metrics_res[jj, ii, :] = db_eval_iou(all_gt_masks[ii, ...], all_res_masks[jj, ...], all_void_masks)
-                if 'F' in metric:
-                    f_metrics_res[jj, ii, :] = db_eval_boundary(all_gt_masks[ii, ...], all_res_masks[jj, ...], all_void_masks)
-        if 'J' in metric and 'F' in metric:
-            all_metrics = (np.mean(j_metrics_res, axis=2) + np.mean(f_metrics_res, axis=2)) / 2
-        else:
-            all_metrics = np.mean(j_metrics_res, axis=2) if 'J' in metric else np.mean(f_metrics_res, axis=2)
-        row_ind, col_ind = linear_sum_assignment(-all_metrics)
-        return j_metrics_res[row_ind, col_ind, :], f_metrics_res[row_ind, col_ind, :]
-    def evaluate(self, res_path, metric=('J', 'F'), debug=False):
-        metric = metric if isinstance(metric, tuple) or isinstance(metric, list) else [metric]
-        if 'T' in metric:
-            raise ValueError('Temporal metric not supported!')
-        if 'J' not in metric and 'F' not in metric:
-            raise ValueError('Metric possible values are J for IoU or F for Boundary')
-        # Containers
-        metrics_res = {}
-        if 'J' in metric:
-            metrics_res['J'] = {"M": [], "R": [], "D": [], "M_per_object": {}}
-        if 'F' in metric:
-            metrics_res['F'] = {"M": [], "R": [], "D": [], "M_per_object": {}}
-        # Sweep all sequences
-        results = Results(root_dir=res_path)
-        for seq in tqdm(list(self.dataset.get_sequences())):
-            all_gt_masks, all_void_masks, all_masks_id = self.dataset.get_all_masks(seq, True)
-            if self.task == 'semi-supervised':
-                all_gt_masks, all_masks_id = all_gt_masks[:, 1:-1, :, :], all_masks_id[1:-1]
-            all_res_masks = results.read_masks(seq, all_masks_id)
-            if self.task == 'unsupervised':
-                j_metrics_res, f_metrics_res = self._evaluate_unsupervised(all_gt_masks, all_res_masks, all_void_masks, metric)
-            elif self.task == 'semi-supervised':
-                j_metrics_res, f_metrics_res = self._evaluate_semisupervised(all_gt_masks, all_res_masks, None, metric)
-            for ii in range(all_gt_masks.shape[0]):
-                seq_name = f'{seq}_{ii+1}'
-                if 'J' in metric:
-                    [JM, JR, JD] = utils.db_statistics(j_metrics_res[ii])
-                    metrics_res['J']["M"].append(JM)
-                    metrics_res['J']["R"].append(JR)
-                    metrics_res['J']["D"].append(JD)
-                    metrics_res['J']["M_per_object"][seq_name] = JM
-                if 'F' in metric:
-                    [FM, FR, FD] = utils.db_statistics(f_metrics_res[ii])
-                    metrics_res['F']["M"].append(FM)
-                    metrics_res['F']["R"].append(FR)
-                    metrics_res['F']["D"].append(FD)
-                    metrics_res['F']["M_per_object"][seq_name] = FM
-            # Show progress
-            if debug:
-                sys.stdout.write(seq + '\n')
-                sys.stdout.flush()
-        return metrics_res

ext/davis2017/metrics.py DELETED Viewed

@@ -1,197 +0,0 @@
-import math
-import numpy as np
-import cv2
-def db_eval_iou(annotation, segmentation, void_pixels=None):
-    """ Compute region similarity as the Jaccard Index.
-    Arguments:
-        annotation   (ndarray): binary annotation   map.
-        segmentation (ndarray): binary segmentation map.
-        void_pixels  (ndarray): optional mask with void pixels
-    Return:
-        jaccard (float): region similarity
-    """
-    assert annotation.shape == segmentation.shape, \
-        f'Annotation({annotation.shape}) and segmentation:{segmentation.shape} dimensions do not match.'
-    annotation = annotation.astype(bool)
-    segmentation = segmentation.astype(bool)
-    if void_pixels is not None:
-        assert annotation.shape == void_pixels.shape, \
-            f'Annotation({annotation.shape}) and void pixels:{void_pixels.shape} dimensions do not match.'
-        void_pixels = void_pixels.astype(bool)
-    else:
-        void_pixels = np.zeros_like(segmentation)
-    # Intersection between all sets
-    inters = np.sum((segmentation & annotation) & np.logical_not(void_pixels), axis=(-2, -1))
-    union = np.sum((segmentation | annotation) & np.logical_not(void_pixels), axis=(-2, -1))
-    j = inters / union
-    if j.ndim == 0:
-        j = 1 if np.isclose(union, 0) else j
-    else:
-        j[np.isclose(union, 0)] = 1
-    return j
-def db_eval_boundary(annotation, segmentation, void_pixels=None, bound_th=0.008):
-    assert annotation.shape == segmentation.shape
-    if void_pixels is not None:
-        assert annotation.shape == void_pixels.shape
-    if annotation.ndim == 3:
-        n_frames = annotation.shape[0]
-        f_res = np.zeros(n_frames)
-        for frame_id in range(n_frames):
-            void_pixels_frame = None if void_pixels is None else void_pixels[frame_id, :, :, ]
-            f_res[frame_id] = f_measure(segmentation[frame_id, :, :, ], annotation[frame_id, :, :], void_pixels_frame, bound_th=bound_th)
-    elif annotation.ndim == 2:
-        f_res = f_measure(segmentation, annotation, void_pixels, bound_th=bound_th)
-    else:
-        raise ValueError(f'db_eval_boundary does not support tensors with {annotation.ndim} dimensions')
-    return f_res
-def f_measure(foreground_mask, gt_mask, void_pixels=None, bound_th=0.008):
-    """
-    Compute mean,recall and decay from per-frame evaluation.
-    Calculates precision/recall for boundaries between foreground_mask and
-    gt_mask using morphological operators to speed it up.
-    Arguments:
-        foreground_mask (ndarray): binary segmentation image.
-        gt_mask         (ndarray): binary annotated image.
-        void_pixels     (ndarray): optional mask with void pixels
-    Returns:
-        F (float): boundaries F-measure
-    """
-    assert np.atleast_3d(foreground_mask).shape[2] == 1
-    if void_pixels is not None:
-        void_pixels = void_pixels.astype(bool)
-    else:
-        void_pixels = np.zeros_like(foreground_mask).astype(bool)
-    bound_pix = bound_th if bound_th >= 1 else \
-        np.ceil(bound_th * np.linalg.norm(foreground_mask.shape))
-    # Get the pixel boundaries of both masks
-    fg_boundary = _seg2bmap(foreground_mask * np.logical_not(void_pixels))
-    gt_boundary = _seg2bmap(gt_mask * np.logical_not(void_pixels))
-    from skimage.morphology import disk
-    # fg_dil = binary_dilation(fg_boundary, disk(bound_pix))
-    fg_dil = cv2.dilate(fg_boundary.astype(np.uint8), disk(bound_pix).astype(np.uint8))
-    # gt_dil = binary_dilation(gt_boundary, disk(bound_pix))
-    gt_dil = cv2.dilate(gt_boundary.astype(np.uint8), disk(bound_pix).astype(np.uint8))
-    # Get the intersection
-    gt_match = gt_boundary * fg_dil
-    fg_match = fg_boundary * gt_dil
-    # Area of the intersection
-    n_fg = np.sum(fg_boundary)
-    n_gt = np.sum(gt_boundary)
-    # % Compute precision and recall
-    if n_fg == 0 and n_gt > 0:
-        precision = 1
-        recall = 0
-    elif n_fg > 0 and n_gt == 0:
-        precision = 0
-        recall = 1
-    elif n_fg == 0 and n_gt == 0:
-        precision = 1
-        recall = 1
-    else:
-        precision = np.sum(fg_match) / float(n_fg)
-        recall = np.sum(gt_match) / float(n_gt)
-    # Compute F measure
-    if precision + recall == 0:
-        F = 0
-    else:
-        F = 2 * precision * recall / (precision + recall)
-    return F
-def _seg2bmap(seg, width=None, height=None):
-    """
-    From a segmentation, compute a binary boundary map with 1 pixel wide
-    boundaries.  The boundary pixels are offset by 1/2 pixel towards the
-    origin from the actual segment boundary.
-    Arguments:
-        seg     : Segments labeled from 1..k.
-        width	  :	Width of desired bmap  <= seg.shape[1]
-        height  :	Height of desired bmap <= seg.shape[0]
-    Returns:
-        bmap (ndarray):	Binary boundary map.
-     David Martin <[email protected]>
-     January 2003
-    """
-    seg = seg.astype(bool)
-    seg[seg > 0] = 1
-    assert np.atleast_3d(seg).shape[2] == 1
-    width = seg.shape[1] if width is None else width
-    height = seg.shape[0] if height is None else height
-    h, w = seg.shape[:2]
-    ar1 = float(width) / float(height)
-    ar2 = float(w) / float(h)
-    assert not (
-        width > w | height > h | abs(ar1 - ar2) > 0.01
-    ), "Can" "t convert %dx%d seg to %dx%d bmap." % (w, h, width, height)
-    e = np.zeros_like(seg)
-    s = np.zeros_like(seg)
-    se = np.zeros_like(seg)
-    e[:, :-1] = seg[:, 1:]
-    s[:-1, :] = seg[1:, :]
-    se[:-1, :-1] = seg[1:, 1:]
-    b = seg ^ e | seg ^ s | seg ^ se
-    b[-1, :] = seg[-1, :] ^ e[-1, :]
-    b[:, -1] = seg[:, -1] ^ s[:, -1]
-    b[-1, -1] = 0
-    if w == width and h == height:
-        bmap = b
-    else:
-        bmap = np.zeros((height, width))
-        for x in range(w):
-            for y in range(h):
-                if b[y, x]:
-                    j = 1 + math.floor((y - 1) + height / h)
-                    i = 1 + math.floor((x - 1) + width / h)
-                    bmap[j, i] = 1
-    return bmap
-if __name__ == '__main__':
-    from davis2017.davis import DAVIS
-    from davis2017.results import Results
-    dataset = DAVIS(root='input_dir/ref', subset='val', sequences='aerobatics')
-    results = Results(root_dir='examples/osvos')
-    # Test timing F measure
-    for seq in dataset.get_sequences():
-        all_gt_masks, _, all_masks_id = dataset.get_all_masks(seq, True)
-        all_gt_masks, all_masks_id = all_gt_masks[:, 1:-1, :, :], all_masks_id[1:-1]
-        all_res_masks = results.read_masks(seq, all_masks_id)
-        f_metrics_res = np.zeros(all_gt_masks.shape[:2])
-        for ii in range(all_gt_masks.shape[0]):
-            f_metrics_res[ii, :] = db_eval_boundary(all_gt_masks[ii, ...], all_res_masks[ii, ...])
-    # Run using to profile code: python -m cProfile -o f_measure.prof metrics.py
-    #                            snakeviz f_measure.prof

ext/davis2017/results.py DELETED Viewed

@@ -1,52 +0,0 @@
-import os
-import numpy as np
-from PIL import Image, ImagePalette
-import sys
-davis_palette = b'\x00\x00\x00\x80\x00\x00\x00\x80\x00\x80\x80\x00\x00\x00\x80\x80\x00\x80\x00\x80\x80\x80\x80\x80@\x00\x00\xc0\x00\x00@\x80\x00\xc0\x80\x00@\x00\x80\xc0\x00\x80@\x80\x80\xc0\x80\x80\x00@\x00\x80@\x00\x00\xc0\x00\x80\xc0\x00\x00@\x80\x80@\x80\x00\xc0\x80\x80\xc0\x80@@\x00\xc0@\x00@\xc0\x00\xc0\xc0\x00@@\x80\xc0@\x80@\xc0\x80\xc0\xc0\x80\x00\x00@\x80\x00@\x00\x80@\x80\x80@\x00\x00\xc0\x80\x00\xc0\x00\x80\xc0\x80\x80\xc0@\x00@\xc0\x00@@\x80@\xc0\x80@@\x00\xc0\xc0\x00\xc0@\x80\xc0\xc0\x80\xc0\x00@@\x80@@\x00\xc0@\x80\xc0@\x00@\xc0\x80@\xc0\x00\xc0\xc0\x80\xc0\xc0@@@\xc0@@@\xc0@\xc0\xc0@@@\xc0\xc0@\xc0@\xc0\xc0\xc0\xc0\xc0 \x00\x00\xa0\x00\x00 \x80\x00\xa0\x80\x00 \x00\x80\xa0\x00\x80 \x80\x80\xa0\x80\x80`\x00\x00\xe0\x00\x00`\x80\x00\xe0\x80\x00`\x00\x80\xe0\x00\x80`\x80\x80\xe0\x80\x80 @\x00\xa0@\x00 \xc0\x00\xa0\xc0\x00 @\x80\xa0@\x80 \xc0\x80\xa0\xc0\x80`@\x00\xe0@\x00`\xc0\x00\xe0\xc0\x00`@\x80\xe0@\x80`\xc0\x80\xe0\xc0\x80 \x00@\xa0\x00@ \x80@\xa0\x80@ \x00\xc0\xa0\x00\xc0 \x80\xc0\xa0\x80\xc0`\x00@\xe0\x00@`\x80@\xe0\x80@`\x00\xc0\xe0\x00\xc0`\x80\xc0\xe0\x80\xc0 @@\xa0@@ \xc0@\xa0\xc0@ @\xc0\xa0@\xc0 \xc0\xc0\xa0\xc0\xc0`@@\xe0@@`\xc0@\xe0\xc0@`@\xc0\xe0@\xc0`\xc0\xc0\xe0\xc0\xc0\x00 \x00\x80 \x00\x00\xa0\x00\x80\xa0\x00\x00 \x80\x80 \x80\x00\xa0\x80\x80\xa0\x80@ \x00\xc0 \x00@\xa0\x00\xc0\xa0\x00@ \x80\xc0 \x80@\xa0\x80\xc0\xa0\x80\x00`\x00\x80`\x00\x00\xe0\x00\x80\xe0\x00\x00`\x80\x80`\x80\x00\xe0\x80\x80\xe0\x80@`\x00\xc0`\x00@\xe0\x00\xc0\xe0\x00@`\x80\xc0`\x80@\xe0\x80\xc0\xe0\x80\x00 @\x80 @\x00\xa0@\x80\xa0@\x00 \xc0\x80 \xc0\x00\xa0\xc0\x80\xa0\xc0@ @\xc0 @@\xa0@\xc0\xa0@@ \xc0\xc0 \xc0@\xa0\xc0\xc0\xa0\xc0\x00`@\x80`@\x00\xe0@\x80\xe0@\x00`\xc0\x80`\xc0\x00\xe0\xc0\x80\xe0\xc0@`@\xc0`@@\xe0@\xc0\xe0@@`\xc0\xc0`\xc0@\xe0\xc0\xc0\xe0\xc0  \x00\xa0 \x00 \xa0\x00\xa0\xa0\x00  \x80\xa0 \x80 \xa0\x80\xa0\xa0\x80` \x00\xe0 \x00`\xa0\x00\xe0\xa0\x00` \x80\xe0 \x80`\xa0\x80\xe0\xa0\x80 `\x00\xa0`\x00 \xe0\x00\xa0\xe0\x00 `\x80\xa0`\x80 \xe0\x80\xa0\xe0\x80``\x00\xe0`\x00`\xe0\x00\xe0\xe0\x00``\x80\xe0`\x80`\xe0\x80\xe0\xe0\x80  @\xa0 @ \xa0@\xa0\xa0@  \xc0\xa0 \xc0 \xa0\xc0\xa0\xa0\xc0` @\xe0 @`\xa0@\xe0\xa0@` \xc0\xe0 \xc0`\xa0\xc0\xe0\xa0\xc0 `@\xa0`@ \xe0@\xa0\xe0@ `\xc0\xa0`\xc0 \xe0\xc0\xa0\xe0\xc0``@\xe0`@`\xe0@\xe0\xe0@``\xc0\xe0`\xc0`\xe0\xc0\xe0\xe0\xc0'
-mose_palette = b'\x00\x00\x00\xe4\x1a\x1c7~\xb8M\xafJ\x98N\xa3\xff\x7f\x00\xff\xff3\xa6V(\xf7\x81\xbf\x99\x99\x99f\xc2\xa5\xfc\x8db\x8d\xa0\xcb\xe7\x8a\xc3\xa6\xd8T\xff\xd9/\xe5\xc4\x94\xb3\xb3\xb3\x8d\xd3\xc7\xff\xff\xb3\xbe\xba\xda\xfb\x80r\x80\xb1\xd3\xfd\xb4b\xb3\xdei\xfc\xcd\xe5\xd9\xd9\xd9\xbc\x80\xbd\xcc\xeb\xc5\xff\xedo'
-class Results(object):
-    def __init__(self, root_dir):
-        self.root_dir = root_dir
-    def _read_mask(self, sequence, frame_id):
-        try:
-            mask_path = os.path.join(self.root_dir, sequence, f'{frame_id}.png')
-            # BUGFIX
-            # There is a bug in the codebase
-            # Here is a compensation.
-            img = Image.open(mask_path)
-            if img.mode != 'P':
-                img_color = np.array(img)
-                h, w, three = img_color.shape
-                assert three == 3
-                img_new = np.ones((h, w), dtype=np.uint8) * 255
-                color_map_np = np.frombuffer(davis_palette, dtype=np.uint8).reshape(-1, 3).copy()
-                for i in range(10):
-                    cur_color = color_map_np[i]
-                    mask = np.all(img_color == cur_color, axis=-1)
-                    img_new[mask] = i
-                assert not np.all(img_new == 255).any()
-                img = img_new
-            # BUGFIX
-            return np.array(img)
-        except IOError as err:
-            sys.stdout.write(sequence + " frame %s not found!\n" % frame_id)
-            sys.stdout.write("The frames have to be indexed PNG files placed inside the corespondent sequence "
-                             "folder.\nThe indexes have to match with the initial frame.\n")
-            sys.stderr.write("IOError: " + err.strerror + "\n")
-            sys.exit()
-    def read_masks(self, sequence, masks_id):
-        mask_0 = self._read_mask(sequence, masks_id[0])
-        masks = np.zeros((len(masks_id), *mask_0.shape))
-        for ii, m in enumerate(masks_id):
-            masks[ii, ...] = self._read_mask(sequence, m)
-        num_objects = int(np.max(masks))
-        tmp = np.ones((num_objects, *masks.shape))
-        tmp = tmp * np.arange(1, num_objects + 1)[:, None, None, None]
-        masks = (tmp == masks[None, ...]) > 0
-        return masks

ext/davis2017/utils.py DELETED Viewed

@@ -1,174 +0,0 @@
-import os
-import errno
-import numpy as np
-from PIL import Image
-import warnings
-from ext.davis2017.davis import DAVIS
-def _pascal_color_map(N=256, normalized=False):
-    """
-    Python implementation of the color map function for the PASCAL VOC data set.
-    Official Matlab version can be found in the PASCAL VOC devkit
-    http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#devkit
-    """
-    def bitget(byteval, idx):
-        return (byteval & (1 << idx)) != 0
-    dtype = 'float32' if normalized else 'uint8'
-    cmap = np.zeros((N, 3), dtype=dtype)
-    for i in range(N):
-        r = g = b = 0
-        c = i
-        for j in range(8):
-            r = r | (bitget(c, 0) << 7 - j)
-            g = g | (bitget(c, 1) << 7 - j)
-            b = b | (bitget(c, 2) << 7 - j)
-            c = c >> 3
-        cmap[i] = np.array([r, g, b])
-    cmap = cmap / 255 if normalized else cmap
-    return cmap
-def overlay_semantic_mask(im, ann, alpha=0.5, colors=None, contour_thickness=None):
-    im, ann = np.asarray(im, dtype=np.uint8), np.asarray(ann, dtype=np.int)
-    if im.shape[:-1] != ann.shape:
-        raise ValueError('First two dimensions of `im` and `ann` must match')
-    if im.shape[-1] != 3:
-        raise ValueError('im must have three channels at the 3 dimension')
-    colors = colors or _pascal_color_map()
-    colors = np.asarray(colors, dtype=np.uint8)
-    mask = colors[ann]
-    fg = im * alpha + (1 - alpha) * mask
-    img = im.copy()
-    img[ann > 0] = fg[ann > 0]
-    if contour_thickness:  # pragma: no cover
-        import cv2
-        for obj_id in np.unique(ann[ann > 0]):
-            contours = cv2.findContours((ann == obj_id).astype(
-                np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)[-2:]
-            cv2.drawContours(img, contours[0], -1, colors[obj_id].tolist(),
-                             contour_thickness)
-    return img
-def generate_obj_proposals(davis_root, subset, num_proposals, save_path):
-    dataset = DAVIS(davis_root, subset=subset, codalab=True)
-    for seq in dataset.get_sequences():
-        save_dir = os.path.join(save_path, seq)
-        if os.path.exists(save_dir):
-            continue
-        all_gt_masks, all_masks_id = dataset.get_all_masks(seq, True)
-        img_size = all_gt_masks.shape[2:]
-        num_rows = int(np.ceil(np.sqrt(num_proposals)))
-        proposals = np.zeros((num_proposals, len(all_masks_id), *img_size))
-        height_slices = np.floor(np.arange(0, img_size[0] + 1, img_size[0]/num_rows)).astype(np.uint).tolist()
-        width_slices = np.floor(np.arange(0, img_size[1] + 1, img_size[1]/num_rows)).astype(np.uint).tolist()
-        ii = 0
-        prev_h, prev_w = 0, 0
-        for h in height_slices[1:]:
-            for w in width_slices[1:]:
-                proposals[ii, :, prev_h:h, prev_w:w] = 1
-                prev_w = w
-                ii += 1
-                if ii == num_proposals:
-                    break
-            prev_h, prev_w = h, 0
-            if ii == num_proposals:
-                break
-        os.makedirs(save_dir, exist_ok=True)
-        for i, mask_id in enumerate(all_masks_id):
-            mask = np.sum(proposals[:, i, ...] * np.arange(1, proposals.shape[0] + 1)[:, None, None], axis=0)
-            save_mask(mask, os.path.join(save_dir, f'{mask_id}.png'))
-def generate_random_permutation_gt_obj_proposals(davis_root, subset, save_path):
-    dataset = DAVIS(davis_root, subset=subset, codalab=True)
-    for seq in dataset.get_sequences():
-        gt_masks, all_masks_id = dataset.get_all_masks(seq, True)
-        obj_swap = np.random.permutation(np.arange(gt_masks.shape[0]))
-        gt_masks = gt_masks[obj_swap, ...]
-        save_dir = os.path.join(save_path, seq)
-        os.makedirs(save_dir, exist_ok=True)
-        for i, mask_id in enumerate(all_masks_id):
-            mask = np.sum(gt_masks[:, i, ...] * np.arange(1, gt_masks.shape[0] + 1)[:, None, None], axis=0)
-            save_mask(mask, os.path.join(save_dir, f'{mask_id}.png'))
-def color_map(N=256, normalized=False):
-    def bitget(byteval, idx):
-        return ((byteval & (1 << idx)) != 0)
-    dtype = 'float32' if normalized else 'uint8'
-    cmap = np.zeros((N, 3), dtype=dtype)
-    for i in range(N):
-        r = g = b = 0
-        c = i
-        for j in range(8):
-            r = r | (bitget(c, 0) << 7-j)
-            g = g | (bitget(c, 1) << 7-j)
-            b = b | (bitget(c, 2) << 7-j)
-            c = c >> 3
-        cmap[i] = np.array([r, g, b])
-    cmap = cmap/255 if normalized else cmap
-    return cmap
-def save_mask(mask, img_path):
-    if np.max(mask) > 255:
-        raise ValueError('Maximum id pixel value is 255')
-    mask_img = Image.fromarray(mask.astype(np.uint8))
-    mask_img.putpalette(color_map().flatten().tolist())
-    mask_img.save(img_path)
-def db_statistics(per_frame_values):
-    """ Compute mean,recall and decay from per-frame evaluation.
-    Arguments:
-        per_frame_values (ndarray): per-frame evaluation
-    Returns:
-        M,O,D (float,float,float):
-            return evaluation statistics: mean,recall,decay.
-    """
-    # strip off nan values
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore", category=RuntimeWarning)
-        M = np.nanmean(per_frame_values)
-        O = np.nanmean(per_frame_values > 0.5)
-    N_bins = 4
-    ids = np.round(np.linspace(1, len(per_frame_values), N_bins + 1) + 1e-10) - 1
-    ids = ids.astype(np.uint8)
-    D_bins = [per_frame_values[ids[i]:ids[i + 1] + 1] for i in range(0, 4)]
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore", category=RuntimeWarning)
-        D = np.nanmean(D_bins[0]) - np.nanmean(D_bins[3])
-    return M, O, D
-def list_files(dir, extension=".png"):
-    return [os.path.splitext(file_)[0] for file_ in os.listdir(dir) if file_.endswith(extension)]
-def force_symlink(file1, file2):
-    try:
-        os.symlink(file1, file2)
-    except OSError as e:
-        if e.errno == errno.EEXIST:
-            os.remove(file2)
-        os.symlink(file1, file2)

main.py CHANGED Viewed

@@ -107,7 +107,7 @@ def get_points_with_draw(image, img_state, evt: gr.SelectData):
     return image
-def segment_point(image, img_state):
     output_img = img_state.img
     h, w = output_img.shape[:2]
@@ -125,37 +125,47 @@ def segment_point(image, img_state):
         gt_instances = InstanceData(
             point_coords=selected_point,
         )
-        pb_labels = torch.ones(len(gt_instances), dtype=torch.long, device=device)
-        gt_instances.pb_labels = pb_labels
-        batch_data_samples[0].gt_instances_collected = gt_instances
         batch_data_samples[0].set_metainfo(dict(batch_input_shape=(im_h, im_w)))
         batch_data_samples[0].set_metainfo(dict(img_shape=(h, w)))
         is_prompt = True
     else:
         batch_data_samples = [DetDataSample()]
         batch_data_samples[0].set_metainfo(dict(batch_input_shape=(im_h, im_w)))
         batch_data_samples[0].set_metainfo(dict(img_shape=(h, w)))
         is_prompt = False
     with torch.no_grad():
-        masks, cls_pred = model.predict_with_point(img_tensor, batch_data_samples)
-    assert len(masks) == 1
-    masks = masks[0]
     if is_prompt:
         masks = masks[0, :h, :w]
-        masks = masks > 0.   # no sigmoid
         rgb_shape = tuple(list(masks.shape) + [3])
         color = np.zeros(rgb_shape, dtype=np.uint8)
         color[masks] = np.array([97, 217, 54])
         output_img = (output_img * 0.7 + color * 0.3).astype(np.uint8)
         output_img = Image.fromarray(output_img)
     else:
-        output_img = visualizer._draw_panoptic_seg(
-            output_img,
-            masks['pan_results'].to('cpu').numpy(),
-            classes=CocoPanopticDataset.METAINFO['classes'],
-            palette=CocoPanopticDataset.METAINFO['palette']
-        )
     return image, output_img
@@ -177,6 +187,12 @@ def register_point_mode():
         with gr.Row():
             with gr.Column():
                 with gr.Row():
                     with gr.Column():
                         segment_btn = gr.Button("Segment", variant="primary")
@@ -209,7 +225,7 @@ def register_point_mode():
         segment_btn.click(
             segment_point,
-            [img_p, img_state],
             [img_p, segm_p]
         )

     return image
+def segment_point(image, img_state, mode):
     output_img = img_state.img
     h, w = output_img.shape[:2]
         gt_instances = InstanceData(
             point_coords=selected_point,
         )
+        pb_labels = torch.zeros(len(gt_instances), dtype=torch.long, device=device)
+        gt_instances.bp = pb_labels
+        batch_data_samples[0].gt_instances = gt_instances
+        batch_data_samples[0].data_tag = 'sam'
         batch_data_samples[0].set_metainfo(dict(batch_input_shape=(im_h, im_w)))
         batch_data_samples[0].set_metainfo(dict(img_shape=(h, w)))
         is_prompt = True
     else:
         batch_data_samples = [DetDataSample()]
+        batch_data_samples[0].data_tag = 'coco'
         batch_data_samples[0].set_metainfo(dict(batch_input_shape=(im_h, im_w)))
         batch_data_samples[0].set_metainfo(dict(img_shape=(h, w)))
         is_prompt = False
     with torch.no_grad():
+        results = model.predict(img_tensor, batch_data_samples, rescale=False)
+    masks = results[0]
     if is_prompt:
         masks = masks[0, :h, :w]
+        masks = masks > 0.  # no sigmoid
         rgb_shape = tuple(list(masks.shape) + [3])
         color = np.zeros(rgb_shape, dtype=np.uint8)
         color[masks] = np.array([97, 217, 54])
         output_img = (output_img * 0.7 + color * 0.3).astype(np.uint8)
         output_img = Image.fromarray(output_img)
     else:
+        if mode == 'Panoptic Segmentation':
+            output_img = visualizer._draw_panoptic_seg(
+                output_img,
+                masks['pan_results'].to('cpu').numpy(),
+                classes=CocoPanopticDataset.METAINFO['classes'],
+                palette=CocoPanopticDataset.METAINFO['palette']
+            )
+        elif mode == 'Instance Segmentation':
+            masks['ins_results'] = masks['ins_results'][masks['ins_results'].scores > .2]
+            output_img = visualizer._draw_instances(
+                output_img,
+                masks['ins_results'].to('cpu').numpy(),
+                classes=CocoPanopticDataset.METAINFO['classes'],
+                palette=CocoPanopticDataset.METAINFO['palette']
+            )
     return image, output_img
         with gr.Row():
             with gr.Column():
+                mode = gr.Radio(
+                    ["Panoptic Segmentation", "Instance Segmentation"],
+                    label="Mode",
+                    value="Panoptic Segmentation",
+                    info="Please select the segmentation mode. (Ignored if provided with prompt.)"
+                )
                 with gr.Row():
                     with gr.Column():
                         segment_btn = gr.Button("Segment", variant="primary")
         segment_btn.click(
             segment_point,
+            [img_p, img_state, mode],
             [img_p, segm_p]
         )

seg/models/detectors/mask2former_vid.py CHANGED Viewed

@@ -140,21 +140,23 @@ class Mask2formerVideo(SingleStageDetector):
             assert len(self.OVERLAPPING) == self.num_classes
             mask_cls_results = self.open_voc_inference(feats, mask_cls_results, mask_pred_results)
-        if self.inference_sam:
-            for idx, data_sample in enumerate(batch_data_samples):
-                results = InstanceData()
-                mask = mask_pred_results[idx]
-                img_height, img_width = data_sample.metainfo['img_shape'][:2]
-                mask = mask[:, :img_height, :img_width]
-                ori_height, ori_width = data_sample.metainfo['ori_shape'][:2]
-                mask = F.interpolate(
-                    mask[:, None],
-                    size=(ori_height, ori_width),
-                    mode='bilinear',
-                    align_corners=False)[:, 0]
-                results.masks = mask.sigmoid() > 0.5
-                data_sample.pred_instances = results
-            return batch_data_samples
         if num_frames > 0:
             for frame_id in range(num_frames):
@@ -178,7 +180,7 @@ class Mask2formerVideo(SingleStageDetector):
             )
             results = self.add_pred_to_datasample(batch_data_samples, results_list)
-        return results
     def add_pred_to_datasample(self, data_samples: SampleList,
                                results_list: List[dict]) -> SampleList:

             assert len(self.OVERLAPPING) == self.num_classes
             mask_cls_results = self.open_voc_inference(feats, mask_cls_results, mask_pred_results)
+        if batch_data_samples[0].data_tag == 'sam':
+            return mask_pred_results.cpu().numpy()
+        # # if self.inference_sam:
+        #     for idx, data_sample in enumerate(batch_data_samples):
+        #         results = InstanceData()
+        #         mask = mask_pred_results[idx]
+        #         img_height, img_width = data_sample.metainfo['img_shape'][:2]
+        #         mask = mask[:, :img_height, :img_width]
+        #         ori_height, ori_width = data_sample.metainfo['ori_shape'][:2]
+        #         mask = F.interpolate(
+        #             mask[:, None],
+        #             size=(ori_height, ori_width),
+        #             mode='bilinear',
+        #             align_corners=False)[:, 0]
+        #         results.masks = mask.sigmoid() > 0.5
+        #         data_sample.pred_instances = results
+        #     return batch_data_samples
         if num_frames > 0:
             for frame_id in range(num_frames):
             )
             results = self.add_pred_to_datasample(batch_data_samples, results_list)
+        return results_list
     def add_pred_to_datasample(self, data_samples: SampleList,
                                results_list: List[dict]) -> SampleList:

seg/models/utils/__init__.py CHANGED Viewed

@@ -2,6 +2,4 @@ from .video_gt_preprocess import preprocess_video_panoptic_gt
 from .mask_pool import mask_pool
 from .pan_seg_transform import INSTANCE_OFFSET_HB, mmpan2hbpan, mmgt2hbpan
 from .class_overlapping import calculate_class_overlapping
-from .online_pq_utils import cal_pq, IoUObj, NO_OBJ_ID
 from .no_obj import NO_OBJ
-from .offline_video_metrics import vpq_eval, stq

 from .mask_pool import mask_pool
 from .pan_seg_transform import INSTANCE_OFFSET_HB, mmpan2hbpan, mmgt2hbpan
 from .class_overlapping import calculate_class_overlapping
 from .no_obj import NO_OBJ

seg/models/utils/offline_video_metrics.py DELETED Viewed

@@ -1,114 +0,0 @@
-import numpy as np
-from seg.models.utils import NO_OBJ, INSTANCE_OFFSET_HB
-def vpq_eval(element, num_classes=-1, max_ins=INSTANCE_OFFSET_HB, ign_id=NO_OBJ):
-    assert num_classes != -1
-    import six
-    pred_ids, gt_ids = element
-    offset = 1e7  # 1e7 > 200 * max_ins
-    assert offset > num_classes * max_ins
-    num_cat = num_classes + 1
-    iou_per_class = np.zeros(num_cat, dtype=np.float64)
-    tp_per_class = np.zeros(num_cat, dtype=np.float64)
-    fn_per_class = np.zeros(num_cat, dtype=np.float64)
-    fp_per_class = np.zeros(num_cat, dtype=np.float64)
-    def _ids_to_counts(id_array):
-        ids, counts = np.unique(id_array, return_counts=True)
-        return dict(six.moves.zip(ids, counts))
-    pred_areas = _ids_to_counts(pred_ids)
-    gt_areas = _ids_to_counts(gt_ids)
-    void_id = ign_id * max_ins
-    ign_ids = {
-        gt_id for gt_id in six.iterkeys(gt_areas)
-        if (gt_id // max_ins) == ign_id
-    }
-    int_ids = gt_ids.astype(np.uint64) * offset + pred_ids.astype(np.uint64)
-    int_areas = _ids_to_counts(int_ids)
-    def prediction_void_overlap(pred_id):
-        void_int_id = void_id * offset + pred_id
-        return int_areas.get(void_int_id, 0)
-    def prediction_ignored_overlap(pred_id):
-        total_ignored_overlap = 0
-        for _ign_id in ign_ids:
-            int_id = _ign_id * offset + pred_id
-            total_ignored_overlap += int_areas.get(int_id, 0)
-        return total_ignored_overlap
-    gt_matched = set()
-    pred_matched = set()
-    for int_id, int_area in six.iteritems(int_areas):
-        gt_id = int(int_id // offset)
-        gt_cat = int(gt_id // max_ins)
-        pred_id = int(int_id % offset)
-        pred_cat = int(pred_id // max_ins)
-        if gt_cat != pred_cat:
-            continue
-        union = (
-                gt_areas[gt_id] + pred_areas[pred_id] - int_area -
-                prediction_void_overlap(pred_id)
-        )
-        iou = int_area / union
-        if iou > 0.5:
-            tp_per_class[gt_cat] += 1
-            iou_per_class[gt_cat] += iou
-            gt_matched.add(gt_id)
-            pred_matched.add(pred_id)
-    for gt_id in six.iterkeys(gt_areas):
-        if gt_id in gt_matched:
-            continue
-        cat_id = gt_id // max_ins
-        if cat_id == ign_id:
-            continue
-        fn_per_class[cat_id] += 1
-    for pred_id in six.iterkeys(pred_areas):
-        if pred_id in pred_matched:
-            continue
-        if (prediction_ignored_overlap(pred_id) / pred_areas[pred_id]) > 0.5:
-            continue
-        cat = pred_id // max_ins
-        fp_per_class[cat] += 1
-    return iou_per_class, tp_per_class, fn_per_class, fp_per_class
-def stq(element, num_classes=19, max_ins=10000, ign_id=NO_OBJ, num_things=8, label_divisor=1e4, ins_divisor=1e7):
-    y_pred, y_true = element
-    y_true = y_true.astype(np.int64)
-    y_pred = y_pred.astype(np.int64)
-    # semantic eval
-    semantic_label = y_true // max_ins
-    semantic_prediction = y_pred // max_ins
-    semantic_label = np.where(semantic_label != ign_id,
-                              semantic_label, num_classes)
-    semantic_prediction = np.where(semantic_prediction != ign_id,
-                                   semantic_prediction, num_classes)
-    semantic_ids = np.reshape(semantic_label, [-1]) * label_divisor + np.reshape(semantic_prediction, [-1])
-    # instance eval
-    instance_label = y_true % max_ins
-    label_mask = np.less(semantic_label, num_things)
-    prediction_mask = np.less(semantic_label, num_things)
-    is_crowd = np.logical_and(instance_label == 0, label_mask)
-    label_mask = np.logical_and(label_mask, np.logical_not(is_crowd))
-    prediction_mask = np.logical_and(prediction_mask, np.logical_not(is_crowd))
-    seq_preds = y_pred[prediction_mask]
-    seg_labels = y_true[label_mask]
-    non_crowd_intersection = np.logical_and(label_mask, prediction_mask)
-    intersection_ids = (y_true[non_crowd_intersection] * ins_divisor + y_pred[non_crowd_intersection])
-    return semantic_ids, seq_preds, seg_labels, intersection_ids

seg/models/utils/online_pq_utils.py DELETED Viewed

@@ -1,73 +0,0 @@
-from seg.models.utils.no_obj import NO_OBJ
-from seg.models.utils.pan_seg_transform import INSTANCE_OFFSET_HB
-from panopticapi.evaluation import PQStat
-NO_OBJ_ID = NO_OBJ * INSTANCE_OFFSET_HB
-class IoUObj:
-    def __init__(self, intersection: int = 0, union: int = 0):
-        self.intersection = intersection
-        self.union = union
-    def __iadd__(self, other):
-        self.intersection += other.intersection
-        self.union += other.union
-        return self
-    def __isub__(self, other):
-        self.intersection -= other.intersection
-        self.union -= other.union
-        return self
-    def is_legal(self):
-        return self.intersection >= 0 and self.union >= 0
-    @property
-    def iou(self):
-        return self.intersection / self.union
-def cal_pq(global_intersection_info, classes):
-    num_classes = len(classes)
-    gt_matched = set()
-    pred_matched = set()
-    gt_all = set()
-    pred_all = set()
-    pq_stat = PQStat()
-    for gt_id, pred_id in global_intersection_info:
-        gt_cat = gt_id // INSTANCE_OFFSET_HB
-        pred_cat = pred_id // INSTANCE_OFFSET_HB
-        assert pred_cat < num_classes
-        if global_intersection_info[gt_id, pred_id].union == 0:
-            continue
-        if gt_cat == NO_OBJ:
-            continue
-        gt_all.add(gt_id)
-        pred_all.add(pred_id)
-        if gt_cat != pred_cat:
-            continue
-        iou = global_intersection_info[gt_id, pred_id].iou
-        if iou > 0.5:
-            pq_stat[gt_cat].tp += 1
-            pq_stat[gt_cat].iou += iou
-            gt_matched.add(gt_id)
-            pred_matched.add(pred_id)
-    for gt_id in gt_all:
-        gt_cat = gt_id // INSTANCE_OFFSET_HB
-        if gt_id in gt_matched:
-            continue
-        pq_stat[gt_cat].fn += 1
-    for pred_id in pred_all:
-        pred_cat = pred_id // INSTANCE_OFFSET_HB
-        if pred_id in pred_matched:
-            continue
-        if global_intersection_info[NO_OBJ_ID, pred_id].iou > 0.5:
-            continue
-        pq_stat[pred_cat].fp += 1
-    return pq_stat