Spaces:

BAAI
/

tokenize-anything

Running on A10G

App Files Files Community

PhyscalX commited on Jan 7

Commit

825a49c

•

1 Parent(s): ae507fe

Sync with main repo

Browse files

Files changed (27) hide show

app.py +6 -6
models/tap_vit_l_548184.pkl +3 -0
tokenize_anything/__init__.py +1 -1
tokenize_anything/engine/__init__.py +29 -0
tokenize_anything/engine/build.py +25 -0
tokenize_anything/engine/lr_scheduler.py +76 -0
tokenize_anything/{test_engine.py → engine/test_engine.py} +1 -1
tokenize_anything/engine/utils.py +153 -0
tokenize_anything/layers/__init__.py +26 -0
tokenize_anything/layers/drop.py +39 -0
tokenize_anything/layers/loss.py +82 -0
tokenize_anything/layers/utils.py +64 -0
tokenize_anything/modeling/concept_projector.py +2 -2
tokenize_anything/modeling/image_decoder.py +4 -7
tokenize_anything/modeling/image_encoder.py +6 -3
tokenize_anything/modeling/image_tokenizer.py +8 -9
tokenize_anything/modeling/text_decoder.py +5 -2
tokenize_anything/models/__init__.py +18 -0
tokenize_anything/{build_model.py → models/easy_build.py} +3 -3
tokenize_anything/prompters/__init__.py +18 -0
tokenize_anything/prompters/visual_prompter.py +106 -0
tokenize_anything/utils/logging.py +129 -0
tokenize_anything/utils/profiler/__init__.py +20 -0
tokenize_anything/utils/profiler/stats.py +42 -0
tokenize_anything/utils/{timer.py → profiler/timer.py} +12 -1
tokenize_anything/utils/registry.py +54 -0
tokenize_anything/utils/tensorboard.py +68 -0

app.py CHANGED Viewed

@@ -23,18 +23,18 @@ import time
 import numpy as np
 import torch
-from tokenize_anything import test_engine
 from tokenize_anything.utils.image import im_rescale
 from tokenize_anything.utils.image import im_vstack
 def parse_args():
     """Parse arguments."""
-    parser = argparse.ArgumentParser(description="Launch gradio app.")
     parser.add_argument("--model-type", type=str, default="tap_vit_l")
-    parser.add_argument("--checkpoint", type=str, default="models/tap_vit_l_03f8ec.pkl")
     parser.add_argument("--concept", type=str, default="concepts/merged_2560.pkl")
-    parser.add_argument("--device", nargs="+", type=int, default=[0], help="Index of devices.")
     return parser.parse_args()
@@ -94,7 +94,7 @@ class Predictor(object):
         # Generate captions.
         sem_tokens = outputs["sem_tokens"][mask_index].unsqueeze_(1)
         captions = self.model.generate_text(sem_tokens).reshape(batch_shape)
-        # Postprecess results.
         results = []
         for i in range(batch_shape[0]):
             pred_h, pred_w = im_info[i, :2].astype("int")
@@ -227,7 +227,7 @@ if __name__ == "__main__":
     args = parse_args()
     queues = [mp.Queue(1024) for _ in range(len(args.device) + 1)]
     commands = [
-        test_engine.InferenceCommand(
             queues[i],
             queues[-1],
             kwargs={

 import numpy as np
 import torch
+from tokenize_anything import engine
 from tokenize_anything.utils.image import im_rescale
 from tokenize_anything.utils.image import im_vstack
 def parse_args():
     """Parse arguments."""
+    parser = argparse.ArgumentParser(description="Launch gradio application")
     parser.add_argument("--model-type", type=str, default="tap_vit_l")
+    parser.add_argument("--checkpoint", type=str, default="models/tap_vit_l_548184.pkl")
     parser.add_argument("--concept", type=str, default="concepts/merged_2560.pkl")
+    parser.add_argument("--device", nargs="+", type=int, default=[0], help="Index of devices")
     return parser.parse_args()
         # Generate captions.
         sem_tokens = outputs["sem_tokens"][mask_index].unsqueeze_(1)
         captions = self.model.generate_text(sem_tokens).reshape(batch_shape)
+        # Postprocess results.
         results = []
         for i in range(batch_shape[0]):
             pred_h, pred_w = im_info[i, :2].astype("int")
     args = parse_args()
     queues = [mp.Queue(1024) for _ in range(len(args.device) + 1)]
     commands = [
+        engine.InferenceCommand(
             queues[i],
             queues[-1],
             kwargs={

models/tap_vit_l_548184.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1d3a11c572af8cb6bce8016d3a6c6948bba4959ea43811f0e984b9eafeee413
+size 811637521

tokenize_anything/__init__.py CHANGED Viewed

@@ -15,5 +15,5 @@
 # ------------------------------------------------------------------------
 """Tokenize Anything via Prompting."""
-from tokenize_anything.build_model import model_registry
 from tokenize_anything.version import __version__

 # ------------------------------------------------------------------------
 """Tokenize Anything via Prompting."""
+from tokenize_anything.models import model_registry
 from tokenize_anything.version import __version__

tokenize_anything/engine/__init__.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Engine components."""
+from tokenize_anything.engine.build import build_tensorboard
+from tokenize_anything.engine.test_engine import InferenceCommand
+from tokenize_anything.engine.utils import apply_ddp_group
+from tokenize_anything.engine.utils import count_params
+from tokenize_anything.engine.utils import create_ddp_group
+from tokenize_anything.engine.utils import freeze_module
+from tokenize_anything.engine.utils import get_ddp_group
+from tokenize_anything.engine.utils import get_ddp_rank
+from tokenize_anything.engine.utils import get_device
+from tokenize_anything.engine.utils import get_param_groups
+from tokenize_anything.engine.utils import load_weights
+from tokenize_anything.engine.utils import manual_seed

tokenize_anything/engine/build.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Build for engine."""
+def build_tensorboard(log_dir):
+    """Build the tensorboard."""
+    from tokenize_anything.utils.tensorboard import TensorBoard
+    if TensorBoard.is_available():
+        return TensorBoard(log_dir)
+    return None

tokenize_anything/engine/lr_scheduler.py ADDED Viewed

	@@ -0,0 +1,76 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Learning rate schedulers."""
+import math
+class ConstantLR(object):
+    """Constant LR scheduler."""
+    def __init__(self, **kwargs):
+        self._lr_max = kwargs.pop("lr_max")
+        self._lr_min = kwargs.pop("lr_min", 0)
+        self._warmup_steps = kwargs.pop("warmup_steps", 0)
+        self._warmup_factor = kwargs.pop("warmup_factor", 0)
+        if kwargs:
+            raise ValueError("Unexpected arguments: " + ",".join(v for v in kwargs))
+        self._step_count = 0
+        self._last_decay = 1.0
+    def step(self):
+        self._step_count += 1
+    def get_lr(self):
+        if self._step_count < self._warmup_steps:
+            alpha = (self._step_count + 1.0) / self._warmup_steps
+            return self._lr_max * (alpha + (1.0 - alpha) * self._warmup_factor)
+        return self._lr_min + (self._lr_max - self._lr_min) * self.get_decay()
+    def get_decay(self):
+        return self._last_decay
+class CosineLR(ConstantLR):
+    """LR scheduler with cosine decay."""
+    def __init__(self, lr_max, max_steps, lr_min=0, decay_step=1, **kwargs):
+        super(CosineLR, self).__init__(lr_max=lr_max, lr_min=lr_min, **kwargs)
+        self._decay_step = decay_step
+        self._max_steps = max_steps
+    def get_decay(self):
+        t = self._step_count - self._warmup_steps
+        t_max = self._max_steps - self._warmup_steps
+        if t > 0 and t % self._decay_step == 0:
+            self._last_decay = 0.5 * (1.0 + math.cos(math.pi * t / t_max))
+        return self._last_decay
+class LinearLR(ConstantLR):
+    """LR scheduler with linear decay."""
+    def __init__(self, lr_max, max_steps, lr_min=0, decay_step=1, **kwargs):
+        super(LinearLR, self).__init__(lr_max=lr_max, lr_min=lr_min, **kwargs)
+        self._decay_step = decay_step
+        self._max_steps = max_steps
+    def get_decay(self):
+        t = self._step_count - self._warmup_steps
+        t_max = self._max_steps - self._warmup_steps
+        if t > 0 and t % self._decay_step == 0:
+            self._last_decay = 1.0 - float(t) / t_max
+        return self._last_decay

tokenize_anything/{test_engine.py → engine/test_engine.py} RENAMED Viewed

@@ -17,7 +17,7 @@
 import time
-from tokenize_anything.build_model import model_registry
 class InferenceCommand(object):

 import time
+from tokenize_anything.models.easy_build import model_registry
 class InferenceCommand(object):

tokenize_anything/engine/utils.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Engine utilities."""
+import collections
+import functools
+import pickle
+import torch
+import numpy as np
+from tokenize_anything.utils import logging
+GLOBAL_DDP_GROUP = None
+def count_params(module, trainable=True, unit="M"):
+    """Return the number of parameters."""
+    counts = [v.size().numel() for v in module.parameters() if v.requires_grad or (not trainable)]
+    return sum(counts) / {"M": 1e6, "B": 1e9}[unit]
+def freeze_module(module):
+    """Freeze parameters of given module."""
+    module.eval()
+    for param in module.parameters():
+        param.requires_grad = False
+def get_device(index):
+    """Create the available device object."""
+    if torch.cuda.is_available():
+        return torch.device("cuda", index)
+    for device_type in ("mps",):
+        try:
+            if getattr(torch.backends, device_type).is_available():
+                return torch.device(device_type, index)
+        except AttributeError:
+            pass
+    return torch.device("cpu")
+def get_param_groups(module, layer_lr_decay=1.0):
+    """Separate parameters into groups."""
+    memo, groups, inner = {}, collections.OrderedDict(), module
+    if isinstance(module, torch.nn.parallel.DistributedDataParallel):
+        inner = module.module
+    lr_scale_getter = None
+    if layer_lr_decay < 1.0 and hasattr(inner.image_encoder, "get_lr_scale"):
+        lr_scale_getter = functools.partial(inner.image_encoder.get_lr_scale, decay=layer_lr_decay)
+    for name, param in module.named_parameters():
+        if not param.requires_grad:
+            continue
+        attrs = collections.OrderedDict()
+        if lr_scale_getter:
+            attrs["lr_scale"] = lr_scale_getter(name)
+        memo[name] = param.shape
+        no_weight_decay = not (name.endswith("weight") and param.dim() > 1)
+        no_weight_decay = getattr(param, "no_weight_decay", no_weight_decay)
+        if no_weight_decay:
+            attrs["weight_decay"] = 0
+        group_name = "/".join(["%s:%s" % (v[0], v[1]) for v in list(attrs.items())])
+        if group_name not in groups:
+            groups[group_name] = {"params": []}
+            groups[group_name].update(attrs)
+        groups[group_name]["params"].append(param)
+    return list(groups.values())
+def load_weights(module, weights_file, prefix_removed="", strict=True):
+    """Load a weights file."""
+    if not weights_file:
+        return
+    if weights_file.endswith(".pkl"):
+        with open(weights_file, "rb") as f:
+            state_dict = pickle.load(f)
+            for k, v in state_dict.items():
+                state_dict[k] = torch.from_numpy(v) if isinstance(v, np.ndarray) else v
+    else:
+        state_dict = torch.load(weights_file)
+    if prefix_removed:
+        new_state_dict = type(state_dict)()
+        for k in list(state_dict.keys()):
+            new_state_dict[k.replace(prefix_removed, "")] = state_dict.pop(k)
+        state_dict = new_state_dict
+    module.load_state_dict(state_dict, strict=strict)
+def manual_seed(seed, device_and_seed=None):
+    """Set the cpu and device random seed."""
+    torch.manual_seed(seed)
+    if device_and_seed is not None:
+        device_index, device_seed = device_and_seed
+        device_type = get_device(device_index).type
+        np.random.seed(device_seed)
+        if device_type in ("cuda", "mps"):
+            getattr(torch, device_type).manual_seed(device_seed)
+def synchronize_device(device):
+    """Synchronize the computation of device."""
+    if device.type in ("cuda", "mps"):
+        getattr(torch, device.type).synchronize(device)
+def create_ddp_group(cfg, ranks=None, devices=None, num_nodes=1):
+    """Create group for data parallelism."""
+    if not torch.distributed.is_initialized():
+        torch.distributed.init_process_group(backend="nccl")
+    world_rank = torch.distributed.get_rank()
+    ranks = ranks if ranks else [i for i in range(cfg.NUM_GPUS)]
+    logging.set_root(world_rank == ranks[0])
+    devices_per_node = len(ranks) // num_nodes
+    devices = devices if devices else [i % devices_per_node for i in range(len(ranks))]
+    cfg.GPU_ID = devices[world_rank]
+    torch.cuda.set_device(cfg.GPU_ID)
+    global GLOBAL_DDP_GROUP
+    GLOBAL_DDP_GROUP = torch.distributed.new_group(ranks)
+    return GLOBAL_DDP_GROUP
+def get_ddp_group():
+    """Return the process group for data parallelism."""
+    return GLOBAL_DDP_GROUP
+def get_ddp_rank():
+    """Return the rank in the data parallelism group."""
+    ddp_group = get_ddp_group()
+    if ddp_group is None:
+        return 0
+    return torch.distributed.get_rank(ddp_group)
+def apply_ddp_group(module):
+    """Apply data parallelism group for given module."""
+    ddp_group = get_ddp_group()
+    if ddp_group is None:
+        return module
+    return torch.nn.parallel.DistributedDataParallel(module, process_group=ddp_group)

tokenize_anything/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Layers."""
+from tokenize_anything.layers.drop import DropPath
+from tokenize_anything.layers.loss import BinaryDiceLoss
+from tokenize_anything.layers.loss import BinaryFocalLoss
+from tokenize_anything.layers.loss import CrossEntropyLoss
+from tokenize_anything.layers.utils import init_cross_conv
+from tokenize_anything.layers.utils import resize_pos_embed
+from tokenize_anything.layers.utils import set_dropout
+from tokenize_anything.layers.utils import set_drop_path
+from tokenize_anything.layers.utils import set_sync_batch_norm

tokenize_anything/layers/drop.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Drop regularization layers."""
+from torch import nn
+class DropPath(nn.Module):
+    """Set examples to zero randomly."""
+    def __init__(self, p=0.1, inplace=False):
+        super(DropPath, self).__init__()
+        self.p = p
+        self.inplace = inplace
+    def forward(self, input):
+        if not self.training or self.p <= 0:
+            return input
+        keep_p = 1 - self.p
+        shape = (input.shape[0],) + (1,) * (input.dim() - 1)
+        scale = input.new_empty(shape).bernoulli_(keep_p).div_(keep_p)
+        return input.mul_(scale) if self.inplace else input.mul(scale)
+    def extra_repr(self):
+        inplace_str = ", inplace" if self.inplace else ""
+        return "p={}{}".format(self.p, inplace_str)

tokenize_anything/layers/loss.py ADDED Viewed

	@@ -0,0 +1,82 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Loss layers."""
+from torch import nn
+def reduce_loss(loss, reduction="mean"):
+    """Reduce the loss."""
+    if reduction == "mean" or reduction == "sum":
+        return getattr(loss, reduction)()
+    if reduction == "batch_mean":
+        return loss.sum().mul_(1.0 / loss.size(0))
+    return loss
+class BinaryFocalLoss(nn.Module):
+    """Binary focal loss."""
+    def __init__(self, alpha=0.25, reduction="none"):
+        super(BinaryFocalLoss, self).__init__()
+        self.alpha = alpha
+        self.reduction = reduction
+    def forward(self, input, target):
+        alpha, p = self.alpha, input.sigmoid()
+        neg_alpha, neg_target = 1.0 - alpha, 1.0 - target
+        alpha_weight = target.mul(alpha).add_(neg_target.mul(neg_alpha))
+        focal_weight = (1.0 - p).mul_(target).add_(p.mul(neg_target)).square()
+        loss = nn.functional.binary_cross_entropy_with_logits(input, target, reduction="none")
+        return reduce_loss(loss * focal_weight.mul_(alpha_weight), self.reduction)
+class BinaryDiceLoss(nn.Module):
+    """Binary dice loss."""
+    def __init__(self, eps=1.0, reduction="none"):
+        super(BinaryDiceLoss, self).__init__()
+        self.eps = eps
+        self.reduction = reduction
+    def forward(self, input, target):
+        input = input.sigmoid()
+        num = input.mul(target).sum(-1).mul_(2).add_(self.eps)
+        den = input.add(target).sum(-1).add_(self.eps)
+        return reduce_loss(1.0 - num / den, self.reduction)
+class CrossEntropyLoss(nn.Module):
+    """Cross entropy loss with label smoothing."""
+    def __init__(self, epsilon=0, reduction="none"):
+        super(CrossEntropyLoss, self).__init__()
+        self.epsilon = epsilon
+        self.reduction = reduction
+    def forward_dense(self, input, target):
+        dim, target = input.shape[-1], target.squeeze_()
+        x = nn.functional.log_softmax(input, dim=-1)
+        y = nn.functional.one_hot(target, dim).float()
+        x = x.permute([0, x.dim() - 1] + list(range(x.dim()))[1:-1]) if x.dim() > 2 else x
+        y = y.permute([0, y.dim() - 1] + list(range(y.dim()))[1:-1]) if y.dim() > 2 else y
+        loss = nn.functional.cross_entropy(x, y, reduction="none", label_smoothing=self.epsilon)
+        return reduce_loss(loss, self.reduction)
+    def forward(self, input, target):
+        if self.epsilon > 0:
+            return self.forward_dense(input, target)
+        return nn.functional.cross_entropy(input, target, reduction=self.reduction)

tokenize_anything/layers/utils.py ADDED Viewed

	@@ -0,0 +1,64 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Layer utilities."""
+import cv2
+import numpy as np
+import torch
+def init_cross_conv(blocks):
+    """Initialize convolutional cross attention."""
+    for m in blocks.modules():
+        if isinstance(m, torch.nn.Conv2d):
+            torch.nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+    for blk in blocks:
+        torch.nn.init.constant_(blk.norm3.weight, 0)
+def set_dropout(module, dropout):
+    """Initialize dropout."""
+    for m in [m for m in module.modules() if isinstance(m, torch.nn.Dropout)]:
+        m.p = dropout
+def set_drop_path(blocks, drop_path):
+    """Initialize drop path."""
+    if not isinstance(blocks, torch.nn.ModuleList):
+        blocks = getattr(blocks, "blocks", getattr(blocks, "layers", None))
+    for i, blk in enumerate(blocks):
+        for m in [m for m in blk.modules() if type(m).__name__ == "DropPath"]:
+            m.p = i * drop_path / (len(blocks) - 1)
+def set_sync_batch_norm(module, ddp_group):
+    """Set data parallelism group for sync batch norm."""
+    for m in module.modules():
+        if isinstance(m, torch.nn.SyncBatchNorm):
+            m.process_group = ddp_group
+def resize_pos_embed(weight, out_len):
+    """Resize position embedding weights."""
+    out_h = out_w = int(out_len**0.5)
+    h = w = int(weight.shape[0] ** 0.5)
+    weight = weight.reshape((h, w, weight.shape[1]))
+    out_weight = [
+        cv2.resize(x, (out_w, out_h), interpolation=cv2.INTER_CUBIC)
+        for x in np.split(weight.astype("float32", copy=False), 4, axis=-1)
+    ]
+    out_weight = np.concatenate(out_weight, axis=-1)
+    return out_weight.reshape((-1, weight.shape[-1])).astype(weight.dtype, copy=False)

tokenize_anything/modeling/concept_projector.py CHANGED Viewed

@@ -51,11 +51,11 @@ class ConceptProjector(nn.Module):
             proj = proj.to(device=embeds.device)
         return embeds, proj
-    def encode_src(self, src_embeds):
         """Encode source visual embedding via concept projection."""
         src_embeds, self.src_weights = self.maybe_convert(src_embeds, self.src_weights)
         logits = nn.functional.normalize(src_embeds, dim=-1) @ self.src_weights
-        return nn.functional.log_softmax(logits, dim=-1)
     def encode_tgt(self, tgt_embeds):
         """Encode target visual embedding via concept projection."""

             proj = proj.to(device=embeds.device)
         return embeds, proj
+    def encode_src(self, src_embeds, logpi=True):
         """Encode source visual embedding via concept projection."""
         src_embeds, self.src_weights = self.maybe_convert(src_embeds, self.src_weights)
         logits = nn.functional.normalize(src_embeds, dim=-1) @ self.src_weights
+        return nn.functional.log_softmax(logits, dim=-1) if logpi else logits
     def encode_tgt(self, tgt_embeds):
         """Encode target visual embedding via concept projection."""

tokenize_anything/modeling/image_decoder.py CHANGED Viewed

@@ -76,7 +76,6 @@ class Block(nn.Module):
         num_heads=8,
         attn_ratio=0.5,
         mlp_dim=2048,
-        dropout=0.1,
         activation_type="ReLU",
         skip_first_query_pos=False,
     ):
@@ -89,7 +88,7 @@ class Block(nn.Module):
         self.norm3 = nn.LayerNorm(dim)
         self.cross_attn_image_to_token = Attention(dim, num_heads, attn_ratio)
         self.norm4 = nn.LayerNorm(dim)
-        self.dropout = nn.Dropout(dropout, inplace=True)
         self.skip_first_query_pos = skip_first_query_pos
     def forward(self, query, key, query_pos, key_pos):
@@ -115,7 +114,6 @@ class Transformer(nn.Module):
         num_heads=8,
         attn_ratio=0.5,
         mlp_dim=2048,
-        dropout=0.1,
         activation_type="ReLU",
         depth=2,
     ):
@@ -126,7 +124,6 @@ class Transformer(nn.Module):
                 num_heads,
                 attn_ratio=attn_ratio,
                 mlp_dim=mlp_dim,
-                dropout=dropout,
                 activation_type=activation_type,
                 skip_first_query_pos=i == 0,
             )
@@ -134,7 +131,7 @@ class Transformer(nn.Module):
         )
         self.final_attn_token_to_image = Attention(embed_dim, num_heads, attn_ratio)
         self.norm = nn.LayerNorm(embed_dim)
-        self.dropout = nn.Dropout(dropout, inplace=True)
     def forward(self, query, key, query_pos, key_pos):
         for blk in self.blocks:
@@ -202,7 +199,7 @@ class ImageDecoder(nn.Module):
         query, key = self.transformer(query, key, query, inputs["img_pos"])
         # Upscale key.
         key = key.transpose(1, 2).view((-1, self.embed_dim) + img_embed_size)
-        output_masks = self.output_conv(key).flatten(2)
         # Unpack query.
         tokens = query[:, :num_tokens].unbind(dim=1)
         iou_tokens = tokens[num_tokens - self.num_mask_tokens - 1]
@@ -210,7 +207,7 @@ class ImageDecoder(nn.Module):
         sem_tokens = tokens[: self.num_mask_tokens]
         # Predict.
         mask_pred = [f(x) for f, x in zip(self.mask_pred, mask_tokens)]
-        mask_pred = torch.stack(mask_pred, dim=1) @ output_masks
         mask_pred_size = list(4 * embed_size for embed_size in img_embed_size)
         mask_pred = mask_pred.view([-1, self.num_mask_tokens] + mask_pred_size)
         outputs = {"iou_pred": self.iou_pred(iou_tokens), "mask_pred": mask_pred}

         num_heads=8,
         attn_ratio=0.5,
         mlp_dim=2048,
         activation_type="ReLU",
         skip_first_query_pos=False,
     ):
         self.norm3 = nn.LayerNorm(dim)
         self.cross_attn_image_to_token = Attention(dim, num_heads, attn_ratio)
         self.norm4 = nn.LayerNorm(dim)
+        self.dropout = nn.Dropout(0.1, inplace=True)
         self.skip_first_query_pos = skip_first_query_pos
     def forward(self, query, key, query_pos, key_pos):
         num_heads=8,
         attn_ratio=0.5,
         mlp_dim=2048,
         activation_type="ReLU",
         depth=2,
     ):
                 num_heads,
                 attn_ratio=attn_ratio,
                 mlp_dim=mlp_dim,
                 activation_type=activation_type,
                 skip_first_query_pos=i == 0,
             )
         )
         self.final_attn_token_to_image = Attention(embed_dim, num_heads, attn_ratio)
         self.norm = nn.LayerNorm(embed_dim)
+        self.dropout = nn.Dropout(0.1, inplace=True)
     def forward(self, query, key, query_pos, key_pos):
         for blk in self.blocks:
         query, key = self.transformer(query, key, query, inputs["img_pos"])
         # Upscale key.
         key = key.transpose(1, 2).view((-1, self.embed_dim) + img_embed_size)
+        mask_embeds = self.output_conv(key).flatten(2)
         # Unpack query.
         tokens = query[:, :num_tokens].unbind(dim=1)
         iou_tokens = tokens[num_tokens - self.num_mask_tokens - 1]
         sem_tokens = tokens[: self.num_mask_tokens]
         # Predict.
         mask_pred = [f(x) for f, x in zip(self.mask_pred, mask_tokens)]
+        mask_pred = torch.stack(mask_pred, dim=1) @ mask_embeds
         mask_pred_size = list(4 * embed_size for embed_size in img_embed_size)
         mask_pred = mask_pred.view([-1, self.num_mask_tokens] + mask_pred_size)
         outputs = {"iou_pred": self.iou_pred(iou_tokens), "mask_pred": mask_pred}

tokenize_anything/modeling/image_encoder.py CHANGED Viewed

@@ -17,6 +17,8 @@
 import torch
 from torch import nn
 def space_to_depth(input, block_size):
     """Rearrange blocks of spatial data into depth."""
@@ -84,10 +86,11 @@ class Block(nn.Module):
         self.attn = Attention(dim, num_heads, qkv_bias=qkv_bias)
         self.norm2 = nn.LayerNorm(dim)
         self.mlp = MLP(dim, mlp_ratio=mlp_ratio)
     def forward(self, x):
-        x = self.attn(self.norm1(x)).add_(x)
-        return self.mlp(self.norm2(x)).add_(x)
 class Bottleneck(nn.Module):
@@ -245,7 +248,7 @@ class ImageEncoderViT(nn.Module):
             if i in self.cross_indices or i == len(self.blocks) - 1:
                 x = self.norm(x) if i == len(self.blocks) - 1 else x
                 x = depth_to_space(x.reshape(wmsa_shape), self.window_size)
-                x = x.permute(0, 3, 1, 2)
             if i in self.cross_indices:
                 x = self.cross_conv[self.cross_indices.index(i)](x)
             if i in self.cross_indices and i < len(self.blocks) - 1:

 import torch
 from torch import nn
+from tokenize_anything import layers
 def space_to_depth(input, block_size):
     """Rearrange blocks of spatial data into depth."""
         self.attn = Attention(dim, num_heads, qkv_bias=qkv_bias)
         self.norm2 = nn.LayerNorm(dim)
         self.mlp = MLP(dim, mlp_ratio=mlp_ratio)
+        self.drop_path = layers.DropPath(0.1, inplace=True)
     def forward(self, x):
+        x = self.drop_path(self.attn(self.norm1(x))).add_(x)
+        return self.drop_path(self.mlp(self.norm2(x))).add_(x)
 class Bottleneck(nn.Module):
             if i in self.cross_indices or i == len(self.blocks) - 1:
                 x = self.norm(x) if i == len(self.blocks) - 1 else x
                 x = depth_to_space(x.reshape(wmsa_shape), self.window_size)
+                x = x.permute(0, 3, 1, 2).contiguous()
             if i in self.cross_indices:
                 x = self.cross_conv[self.cross_indices.index(i)](x)
             if i in self.cross_indices and i < len(self.blocks) - 1:

tokenize_anything/modeling/image_tokenizer.py CHANGED Viewed

@@ -45,13 +45,15 @@ class ImageTokenizer(nn.Module):
         self.register_buffer("pixel_mean", torch.Tensor(pixel_mean))
         self.register_buffer("pixel_rsig", torch.Tensor(pixel_std).reciprocal_())
-    def get_inputs(self, inputs):
         """Return the model inputs.
         Parameters
         ----------
         inputs : dict
             The initial inputs.
         Returns
         -------
@@ -59,13 +61,10 @@ class ImageTokenizer(nn.Module):
             The model inputs.
         """
-        if not isinstance(inputs["img"], torch.Tensor):
-            inputs["img"] = torch.from_numpy(inputs["img"])
-        if inputs["img"].device != self.pixel_mean.device:
-            inputs["img"] = inputs["img"].to(device=self.pixel_mean.device)
-        inputs["img"] = inputs["img"].to(dtype=self.pixel_mean.dtype)
-        inputs["img"] = inputs["img"].sub(self.pixel_mean).mul_(self.pixel_rsig)
-        inputs["img"] = inputs["img"].permute(0, 3, 1, 2)
         return inputs
     def get_features(self, inputs):
@@ -177,7 +176,7 @@ class ImageTokenizer(nn.Module):
             An array of generated texts.
         """
-        max_gen_len = max_gen_len or self.text_decoder.max_seq_len
         prompts = self.text_decoder.get_prompts(visual_tokens)
         out_shape = (prompts.size(0), self.text_decoder.max_text_len)
         tokens = np.full(out_shape, self.text_tokenizer.pad_id, "int64")

         self.register_buffer("pixel_mean", torch.Tensor(pixel_mean))
         self.register_buffer("pixel_rsig", torch.Tensor(pixel_std).reciprocal_())
+    def get_inputs(self, inputs, dtype=None):
         """Return the model inputs.
         Parameters
         ----------
         inputs : dict
             The initial inputs.
+        dtype : torch.dtype, optional
+            The optional input dtype.
         Returns
         -------
             The model inputs.
         """
+        img_dtype, img_device = self.pixel_mean.dtype, self.pixel_mean.device
+        inputs["img"] = torch.as_tensor(inputs["img"], dtype=img_dtype, device=img_device)
+        inputs["img"] = inputs["img"].sub(self.pixel_mean).mul_(self.pixel_rsig).permute(0, 3, 1, 2)
+        inputs["img"] = inputs["img"].to(dtype=dtype) if dtype else inputs["img"]
         return inputs
     def get_features(self, inputs):
             An array of generated texts.
         """
+        max_gen_len = max_gen_len or self.text_decoder.max_text_len
         prompts = self.text_decoder.get_prompts(visual_tokens)
         out_shape = (prompts.size(0), self.text_decoder.max_text_len)
         tokens = np.full(out_shape, self.text_tokenizer.pad_id, "int64")

tokenize_anything/modeling/text_decoder.py CHANGED Viewed

@@ -79,6 +79,7 @@ class TransformerCache(nn.Module):
         cache_v = self.cache_dict.get(f"{id(mixer)}_v", None)
         flash_args = {"softmax_scale": mixer.scale, "causal": True}
         if cache_k is None or cache_v is None:
             return flash_attn_func(q, k, v, **flash_args)
         flash_args["cache_seqlens"] = self.cache_dict["seq_lens"][: q.shape[0]]
         return flash_attn_with_kvcache(q, cache_k, cache_v, k, v, **flash_args)
@@ -94,6 +95,7 @@ class Attention(nn.Module):
         self.head_dim = dim // num_heads
         self.num_heads = num_heads
         self.scale = self.head_dim**-0.5
         self.cache = nn.Module()
     def forward(self, x):
@@ -126,10 +128,11 @@ class Block(nn.Module):
         self.mlp = MLP(dim, mlp_dim, bias=bias)
         self.norm1 = nn.LayerNorm(dim)
         self.norm2 = nn.LayerNorm(dim)
     def forward(self, x):
-        x = self.attn(self.norm1(x)).add_(x)
-        return self.mlp(self.norm2(x)).add_(x)
 class Transformer(nn.Module):

         cache_v = self.cache_dict.get(f"{id(mixer)}_v", None)
         flash_args = {"softmax_scale": mixer.scale, "causal": True}
         if cache_k is None or cache_v is None:
+            flash_args["dropout_p"] = mixer.dropout.p if mixer.training else 0
             return flash_attn_func(q, k, v, **flash_args)
         flash_args["cache_seqlens"] = self.cache_dict["seq_lens"][: q.shape[0]]
         return flash_attn_with_kvcache(q, cache_k, cache_v, k, v, **flash_args)
         self.head_dim = dim // num_heads
         self.num_heads = num_heads
         self.scale = self.head_dim**-0.5
+        self.dropout = nn.Dropout(0.1, inplace=False)
         self.cache = nn.Module()
     def forward(self, x):
         self.mlp = MLP(dim, mlp_dim, bias=bias)
         self.norm1 = nn.LayerNorm(dim)
         self.norm2 = nn.LayerNorm(dim)
+        self.dropout = nn.Dropout(0.1, inplace=True)
     def forward(self, x):
+        x = self.dropout(self.attn(self.norm1(x))).add_(x)
+        return self.dropout(self.mlp(self.norm2(x))).add_(x)
 class Transformer(nn.Module):

tokenize_anything/models/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Models."""
+from tokenize_anything.models.easy_build import model_registry

tokenize_anything/{build_model.py → models/easy_build.py} RENAMED Viewed

@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ------------------------------------------------------------------------
-"""Build model."""
 from functools import partial
 import pickle
@@ -40,7 +40,7 @@ def get_device(device_index):
 def load_weights(module, weights_file, strict=True):
     """Load a weights file."""
     if not weights_file:
-        return module._IncompatibleKeys([], [])
     if weights_file.endswith(".pkl"):
         with open(weights_file, "rb") as f:
             state_dict = pickle.load(f)
@@ -48,7 +48,7 @@ def load_weights(module, weights_file, strict=True):
                 state_dict[k] = torch.from_numpy(v) if isinstance(v, np.ndarray) else v
     else:
         state_dict = torch.load(weights_file)
-    return module.load_state_dict(state_dict, strict=strict)
 def vit_encoder(depth, embed_dim, num_heads, out_dim, image_size):

 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ------------------------------------------------------------------------
+"""Easy model builder."""
 from functools import partial
 import pickle
 def load_weights(module, weights_file, strict=True):
     """Load a weights file."""
     if not weights_file:
+        return
     if weights_file.endswith(".pkl"):
         with open(weights_file, "rb") as f:
             state_dict = pickle.load(f)
                 state_dict[k] = torch.from_numpy(v) if isinstance(v, np.ndarray) else v
     else:
         state_dict = torch.load(weights_file)
+    module.load_state_dict(state_dict, strict=strict)
 def vit_encoder(depth, embed_dim, num_heads, out_dim, image_size):

tokenize_anything/prompters/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Prompters."""
+from tokenize_anything.prompters.visual_prompter import VisualPrompter

tokenize_anything/prompters/visual_prompter.py ADDED Viewed

	@@ -0,0 +1,106 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Generate visual prompts."""
+import collections
+import numpy as np
+import numpy.random as npr
+class VisualPrompter(object):
+    """Generate visual prompts."""
+    def __init__(self, image_size=1024, max_points=9, num_experts=4, padding_index=4):
+        super(VisualPrompter, self).__init__()
+        self.num_stages = 2
+        self.max_points = max_points
+        self.point_weight = [1000] + [0] * (num_experts - 1)
+        self.image_size = image_size if isinstance(image_size, (tuple, list)) else [image_size] * 2
+        self.padding_index = padding_index
+        self.coord_count = collections.defaultdict(int)
+        self.coords = self.labels = self.boxes_turn = None
+        self.stage_count = 0
+        self.box_prob = 0.5
+    @property
+    def is_last_stage(self):
+        return self.stage_count == self.num_stages - 1
+    def add_point(self, index, gt_masks, error_masks=None, num=1):
+        def sample(mask):
+            ys, xs = np.nonzero(mask)
+            if ys.shape[0] > 0:
+                idx = npr.choice(ys.shape[0], size=(num,), replace=num > ys.shape[0])
+                return xs[idx], ys[idx]
+            return [-0.5] * num, [-0.5] * num
+        labels = [self.padding_index] * num
+        if error_masks is not None:  # FP or FN point.
+            xs, ys = sample(error_masks[index])
+            labels = gt_masks[index, ys, xs] if ys[0] >= 0 else labels
+        if labels[0] == self.padding_index:  # GT point.
+            xs, ys = sample(gt_masks[index])
+            labels = [1] * num if ys[0] >= 0 else labels
+        xs = (np.array(xs, "float32") + 0.5) * (self.image_size[1] / gt_masks.shape[2]) - 0.5
+        ys = (np.array(ys, "float32") + 0.5) * (self.image_size[0] / gt_masks.shape[1]) - 0.5
+        slice_index = slice(self.coord_count[index], self.coord_count[index] + num)
+        self.coords[index, slice_index] = np.vstack([xs, ys]).T
+        self.labels[index, slice_index] = labels
+        self.coord_count[index] += num
+    def add_box(self, index, gt_boxes):
+        x1, y1, x2, y2 = gt_boxes[index, :4]
+        dx1, dx2 = np.clip(npr.normal(0.0, 0.1 * (x2 - x1), (2,)), -20, 20)
+        dy1, dy2 = np.clip(npr.normal(0.0, 0.1 * (y2 - y1), (2,)), -20, 20)
+        x1, y1 = x1 + np.minimum(dx1, 0), y1 + np.minimum(dy1, 0)
+        x2, y2 = x2 + np.maximum(dx2, 0), y2 + np.maximum(dy2, 0)
+        self.coords[index, self.coord_count[index]] = (x1, y1)
+        self.coords[index, self.coord_count[index] + 1] = (x2, y2)
+        self.labels[index, self.coord_count[index]] = 2
+        self.labels[index, self.coord_count[index] + 1] = 3
+        self.coord_count[index] += 2
+    def reset(self, num):
+        self.stage_count = 0
+        self.coord_count.clear()
+        self.coords = np.full((num, self.max_points + 1, 2), -0.5, "float32")
+        self.labels = np.full((num, self.max_points + 1), self.padding_index, "int64")
+        self.boxes_turn = npr.rand(num) < self.box_prob
+    def get_prompts(self, gt_boxes, gt_masks=None, masks=None):
+        num = gt_boxes.shape[0]
+        if self.stage_count == 0:
+            self.reset(num)
+        coords = labels = error_masks = None
+        if masks is not None:
+            masks = masks.reshape(gt_masks.shape)
+            error_masks = (masks | gt_masks) ^ (masks & gt_masks)
+        num_points = 1
+        if self.stage_count > 0:
+            num_points = npr.randint(1, self.max_points + 1 - self.stage_count)
+        if self.stage_count == 0 and self.box_prob == 0:
+            num_points = npr.randint(2, self.max_points + 1)
+        for index in range(num):
+            is_box = self.stage_count == 0 and self.boxes_turn[index]
+            if gt_masks is None or is_box:
+                self.add_box(index, gt_boxes)
+            else:
+                self.add_point(index, gt_masks, error_masks, num_points)
+        coords = self.coords[:, : 1 + self.stage_count + num_points]
+        labels = self.labels[:, : 1 + self.stage_count + num_points]
+        scores = (self.boxes_turn[:, None] - 0.5) * self.point_weight
+        return {"points": (coords, labels), "point_score": scores}

tokenize_anything/utils/logging.py ADDED Viewed

	@@ -0,0 +1,129 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Logging utilities."""
+import inspect
+import logging as _logging
+import os
+import sys as _sys
+import threading
+_logger = None
+_logger_lock = threading.Lock()
+def get_logger():
+    global _logger
+    # Use double-checked locking to avoid taking lock unnecessarily.
+    if _logger:
+        return _logger
+    _logger_lock.acquire()
+    try:
+        if _logger:
+            return _logger
+        logger = _logging.getLogger("tokenize-anything")
+        logger.setLevel("INFO")
+        logger.propagate = False
+        logger._is_root = True
+        if True:
+            # Determine whether we are in an interactive environment.
+            _interactive = False
+            try:
+                # This is only defined in interactive shells.
+                if _sys.ps1:
+                    _interactive = True
+            except AttributeError:
+                # Even now, we may be in an interactive shell with `python -i`.
+                _interactive = _sys.flags.interactive
+            # If we are in an interactive environment (like Jupyter), set loglevel
+            # to INFO and pipe the output to stdout.
+            if _interactive:
+                logger.setLevel("INFO")
+                _logging_target = _sys.stdout
+            else:
+                _logging_target = _sys.stderr
+            # Add the output handler.
+            _handler = _logging.StreamHandler(_logging_target)
+            _handler.setFormatter(_logging.Formatter("%(levelname)s %(message)s"))
+            logger.addHandler(_handler)
+        _logger = logger
+        return _logger
+    finally:
+        _logger_lock.release()
+def _detailed_msg(msg):
+    file, lineno = inspect.stack()[:3][2][1:3]
+    return "{}:{}] {}".format(os.path.split(file)[-1], lineno, msg)
+def log(level, msg, *args, **kwargs):
+    get_logger().log(level, _detailed_msg(msg), *args, **kwargs)
+def debug(msg, *args, **kwargs):
+    if is_root():
+        get_logger().debug(_detailed_msg(msg), *args, **kwargs)
+def error(msg, *args, **kwargs):
+    get_logger().error(_detailed_msg(msg), *args, **kwargs)
+    assert 0
+def fatal(msg, *args, **kwargs):
+    get_logger().fatal(_detailed_msg(msg), *args, **kwargs)
+    assert 0
+def info(msg, *args, **kwargs):
+    if is_root():
+        get_logger().info(_detailed_msg(msg), *args, **kwargs)
+def warning(msg, *args, **kwargs):
+    if is_root():
+        get_logger().warning(_detailed_msg(msg), *args, **kwargs)
+def get_verbosity():
+    """Return how much logging output will be produced."""
+    return get_logger().getEffectiveLevel()
+def set_verbosity(v):
+    """Set the threshold for what messages will be logged."""
+    get_logger().setLevel(v)
+def set_formatter(fmt=None, datefmt=None):
+    """Set the formatter."""
+    handler = _logging.StreamHandler(_sys.stderr)
+    handler.setFormatter(_logging.Formatter(fmt, datefmt))
+    logger = get_logger()
+    logger.removeHandler(logger.handlers[0])
+    logger.addHandler(handler)
+def set_root(is_root=True):
+    """Set logger to the root."""
+    get_logger()._is_root = is_root
+def is_root():
+    """Return logger is the root."""
+    return get_logger()._is_root

tokenize_anything/utils/profiler/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Profiler utilities."""
+from tokenize_anything.utils.profiler.stats import SmoothedValue
+from tokenize_anything.utils.profiler.timer import Timer
+from tokenize_anything.utils.profiler.timer import get_progress

tokenize_anything/utils/profiler/stats.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Trackable statistics."""
+import collections
+import numpy as np
+class SmoothedValue(object):
+    """Track values and provide smoothed report."""
+    def __init__(self, window_size=None):
+        self.deque = collections.deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+    def update(self, value):
+        self.deque.append(value)
+        self.count += 1
+        self.total += value
+    def mean(self):
+        return np.mean(self.deque)
+    def median(self):
+        return np.median(self.deque)
+    def average(self):
+        return self.total / self.count

tokenize_anything/utils/{timer.py → profiler/timer.py} RENAMED Viewed

@@ -9,13 +9,14 @@
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ------------------------------------------------------------------------
 """Timing functions."""
 import contextlib
 import time
@@ -49,3 +50,13 @@ class Timer(object):
     def toc(self, n=1, average=True):
         self.diff = time.time() - self.start_time
         return self.add_diff(self.diff, n, average)

 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, esither express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ------------------------------------------------------------------------
 """Timing functions."""
 import contextlib
+import datetime
 import time
     def toc(self, n=1, average=True):
         self.diff = time.time() - self.start_time
         return self.add_diff(self.diff, n, average)
+def get_progress(timer, step, max_steps):
+    """Return the progress information."""
+    eta_seconds = timer.average_time * (max_steps - step)
+    eta = str(datetime.timedelta(seconds=int(eta_seconds)))
+    progress = (step + 1.0) / max_steps
+    return "< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >".format(
+        progress, timer.average_time, eta
+    )

tokenize_anything/utils/registry.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Registry utilities."""
+import collections
+import functools
+class Registry(object):
+    """Registry class."""
+    def __init__(self, name):
+        self.name = name
+        self.registry = collections.OrderedDict()
+    def has(self, key):
+        return key in self.registry
+    def register(self, name, func=None, **kwargs):
+        def decorated(inner_function):
+            for key in name if isinstance(name, (tuple, list)) else [name]:
+                self.registry[key] = functools.partial(inner_function, **kwargs)
+            return inner_function
+        if func is not None:
+            return decorated(func)
+        return decorated
+    def get(self, name, default=None):
+        if name is None:
+            return None
+        if not self.has(name):
+            if default is not None:
+                return default
+            raise KeyError("`%s` is not registered in <%s>." % (name, self.name))
+        return self.registry[name]
+    def try_get(self, name):
+        if self.has(name):
+            return self.get(name)
+        return None

tokenize_anything/utils/tensorboard.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# ------------------------------------------------------------------------
+# Copyright (c) 2023-present, BAAI. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ------------------------------------------------------------------------
+"""Tensorboard application."""
+import time
+import numpy as np
+try:
+    import tensorflow as tf
+except ImportError:
+    tf = None
+class TensorBoard(object):
+    """TensorBoard application."""
+    def __init__(self, log_dir=None):
+        """Create a summary writer logging to log_dir."""
+        if tf is None:
+            raise ImportError("Failed to import ``tensorflow`` package.")
+        tf.config.set_visible_devices([], "GPU")
+        if log_dir is None:
+            log_dir = "./logs/" + time.strftime("%Y%m%d_%H%M%S", time.localtime(time.time()))
+        self.writer = tf.summary.create_file_writer(log_dir)
+    @staticmethod
+    def is_available():
+        """Return if tensor board is available."""
+        return tf is not None
+    def close(self):
+        """Close board and apply all cached summaries."""
+        self.writer.close()
+    def histogram_summary(self, tag, values, step, buckets=10):
+        """Write a histogram of values."""
+        with self.writer.as_default():
+            tf.summary.histogram(tag, values, step, buckets=buckets)
+    def image_summary(self, tag, images, step, order="BGR"):
+        """Write a list of images."""
+        if isinstance(images, (tuple, list)):
+            images = np.stack(images)
+        if len(images.shape) != 4:
+            raise ValueError("Images can not be packed to (N, H, W, C).")
+        if order == "BGR":
+            images = images[:, :, :, ::-1]
+        with self.writer.as_default():
+            tf.summary.image(tag, images, step, max_outputs=images.shape[0])
+    def scalar_summary(self, tag, value, step):
+        """Write a scalar."""
+        with self.writer.as_default():
+            tf.summary.scalar(tag, value, step)