Spaces:

Vision-CAIR
/

LongVU

Running on Zero

File size: 3,089 Bytes

85efb5b

# ------------------------------------------------------------------------
# Copyright (c) 2023-present, BAAI. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ------------------------------------------------------------------------

# pyre-unsafe
"""Image utilities."""

import numpy as np
import PIL.Image
import torch


def im_resize(img, size=None, scale=None, mode="linear"):
    """Resize image by the scale or size."""
    if size is None:
        if not isinstance(scale, (tuple, list)):
            scale = (scale, scale)
        h, w = img.shape[:2]
        size = int(h * scale[0] + 0.5), int(w * scale[1] + 0.5)
    else:
        if not isinstance(size, (tuple, list)):
            size = (size, size)
    resize_modes = {"linear": PIL.Image.BILINEAR}
    from torchvision.transforms import ToPILImage

    to_pil = ToPILImage()
    img = to_pil(img.to(torch.float32).cpu())
    # img = PIL.Image.fromarray(img)
    return np.array(img.resize(size[::-1], resize_modes[mode]))


def im_rescale(img, scales, max_size=0):
    """Rescale image to match the detecting scales."""
    im_shape = img.shape
    img_list, img_scales = [], []
    size_min = np.min(im_shape[:2])
    size_max = np.max(im_shape[:2])
    for target_size in scales:
        im_scale = float(target_size) / float(size_min)
        target_size_max = max_size if max_size > 0 else target_size
        if np.round(im_scale * size_max) > target_size_max:
            im_scale = float(target_size_max) / float(size_max)
        img_list.append(im_resize(img, scale=im_scale))
        img_scales.append((im_scale, im_scale))
    return img_list, img_scales


def im_vstack(arrays, fill_value=None, dtype=None, size=None, align=None):
    """Stack image arrays in sequence vertically."""
    if fill_value is None:
        return np.vstack(arrays)
    # Compute the max stack shape.
    max_shape = np.max(np.stack([arr.shape for arr in arrays]), 0)
    if size is not None and min(size) > 0:
        max_shape[: len(size)] = size
    if align is not None and min(align) > 0:
        align_size = np.ceil(max_shape[: len(align)] / align)
        max_shape[: len(align)] = align_size.astype("int64") * align
    # Fill output with the given value.
    output_dtype = dtype or arrays[0].dtype
    output_shape = [len(arrays)] + list(max_shape)
    output = np.empty(output_shape, output_dtype)
    output[:] = fill_value
    # Copy arrays.
    for i, arr in enumerate(arrays):
        copy_slices = (slice(0, d) for d in arr.shape)
        output[(i,) + tuple(copy_slices)] = arr
    return output