Spaces:

hysts
/

ViTPose_video

Running

App Files Files Community

hysts HF staff commited on Jun 8, 2022

Commit

f185807

•

1 Parent(s): 1e6091d

Add files

Browse files

Files changed (20) hide show

.gitattributes +2 -0
.gitignore +1 -0
.gitmodules +3 -0
.pre-commit-config.yaml +46 -0
.style.yapf +5 -0
README.md +1 -1
ViTPose +1 -0
app.py +158 -0
mmdet_configs/LICENSE +203 -0
mmdet_configs/README.md +2 -0
mmdet_configs/configs.tar +3 -0
model.py +278 -0
packages.txt +1 -0
requirements.txt +9 -0
style.css +17 -0
videos/README.md +6 -0
videos/pexels-allan-mas-5362370.mp4 +3 -0
videos/pexels-artem-podrez-6003986.mp4 +3 -0
videos/pexels-c-technical-6344381.mp4 +3 -0
videos/pexels-roman-odintsov-6815069.mp4 +3 -0

.gitattributes CHANGED Viewed

@@ -1,3 +1,4 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
@@ -17,6 +18,7 @@
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text

+*.mp4 filter=lfs diff=lfs merge=lfs -text
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ mmdet_configs/configs

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "ViTPose"]
+	path = ViTPose
+	url = https://github.com/ViTAE-Transformer/ViTPose

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+exclude: ^(ViTPose/|mmdet_configs/configs/)
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.2.0
+  hooks:
+  - id: check-executables-have-shebangs
+  - id: check-json
+  - id: check-merge-conflict
+  - id: check-shebang-scripts-are-executable
+  - id: check-toml
+  - id: check-yaml
+  - id: double-quote-string-fixer
+  - id: end-of-file-fixer
+  - id: mixed-line-ending
+    args: ['--fix=lf']
+  - id: requirements-txt-fixer
+  - id: trailing-whitespace
+- repo: https://github.com/myint/docformatter
+  rev: v1.4
+  hooks:
+  - id: docformatter
+    args: ['--in-place']
+- repo: https://github.com/pycqa/isort
+  rev: 5.10.1
+  hooks:
+    - id: isort
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v0.812
+  hooks:
+    - id: mypy
+      args: ['--ignore-missing-imports']
+- repo: https://github.com/google/yapf
+  rev: v0.32.0
+  hooks:
+  - id: yapf
+    args: ['--parallel', '--in-place']
+- repo: https://github.com/kynan/nbstripout
+  rev: 0.5.0
+  hooks:
+    - id: nbstripout
+      args: ['--extra-keys', 'metadata.interpreter metadata.kernelspec cell.metadata.pycharm']
+- repo: https://github.com/nbQA-dev/nbQA
+  rev: 1.3.1
+  hooks:
+    - id: nbqa-isort
+    - id: nbqa-yapf

.style.yapf ADDED Viewed

	@@ -0,0 +1,5 @@

+[style]
+based_on_style = pep8
+blank_line_before_nested_class_or_def = false
+spaces_before_comment = 2
+split_before_logical_operator = true

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🦀
 colorFrom: gray
 colorTo: purple
 sdk: gradio
-sdk_version: 3.0.13
 app_file: app.py
 pinned: false
 ---

 colorFrom: gray
 colorTo: purple
 sdk: gradio
+sdk_version: 3.0.11
 app_file: app.py
 pinned: false
 ---

ViTPose ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 92d0aa2710b8e9136dc1712a1c13c12157e435e8

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+#!/usr/bin/env python
+from __future__ import annotations
+import argparse
+import pathlib
+import tarfile
+import gradio as gr
+from model import AppModel
+DESCRIPTION = '''# ViTPose
+This is an unofficial demo for [https://github.com/ViTAE-Transformer/ViTPose](https://github.com/ViTAE-Transformer/ViTPose).
+Related app: [https://huggingface.co/spaces/Gradio-Blocks/ViTPose](https://huggingface.co/spaces/Gradio-Blocks/ViTPose)
+'''
+FOOTER = '<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=hysts.vitpose_video" />'
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', type=str, default='cpu')
+    parser.add_argument('--theme', type=str)
+    parser.add_argument('--share', action='store_true')
+    parser.add_argument('--port', type=int)
+    parser.add_argument('--disable-queue',
+                        dest='enable_queue',
+                        action='store_false')
+    return parser.parse_args()
+def set_example_video(example: list) -> dict:
+    return gr.Video.update(value=example[0])
+def extract_tar() -> None:
+    if pathlib.Path('mmdet_configs/configs').exists():
+        return
+    with tarfile.open('mmdet_configs/configs.tar') as f:
+        f.extractall('mmdet_configs')
+def main():
+    args = parse_args()
+    extract_tar()
+    model = AppModel(device=args.device)
+    with gr.Blocks(theme=args.theme, css='style.css') as demo:
+        gr.Markdown(DESCRIPTION)
+        with gr.Row():
+            with gr.Column():
+                input_video = gr.Video(label='Input Video',
+                                       format='mp4',
+                                       elem_id='input_video')
+                with gr.Group():
+                    detector_name = gr.Dropdown(
+                        list(model.det_model.MODEL_DICT.keys()),
+                        value=model.det_model.model_name,
+                        label='Detector')
+                    pose_model_name = gr.Dropdown(
+                        list(model.pose_model.MODEL_DICT.keys()),
+                        value=model.pose_model.model_name,
+                        label='Pose Model')
+                    det_score_threshold = gr.Slider(
+                        0,
+                        1,
+                        step=0.05,
+                        value=0.5,
+                        label='Box Score Threshold')
+                    max_num_frames = gr.Slider(
+                        1,
+                        300,
+                        step=1,
+                        value=60,
+                        label='Maximum Number of Frames')
+                    predict_button = gr.Button(value='Predict')
+                    pose_preds = gr.Variable()
+                    paths = sorted(pathlib.Path('videos').rglob('*.mp4'))
+                    example_videos = gr.Dataset(components=[input_video],
+                                                samples=[[path.as_posix()]
+                                                         for path in paths])
+            with gr.Column():
+                with gr.Group():
+                    result = gr.Video(label='Result',
+                                      format='mp4',
+                                      elem_id='result')
+                    vis_kpt_score_threshold = gr.Slider(
+                        0,
+                        1,
+                        step=0.05,
+                        value=0.3,
+                        label='Visualization Score Threshold')
+                    vis_dot_radius = gr.Slider(1,
+                                               10,
+                                               step=1,
+                                               value=4,
+                                               label='Dot Radius')
+                    vis_line_thickness = gr.Slider(1,
+                                                   10,
+                                                   step=1,
+                                                   value=2,
+                                                   label='Line Thickness')
+                    redraw_button = gr.Button(value='Redraw')
+        gr.Markdown(FOOTER)
+        detector_name.change(fn=model.det_model.set_model,
+                             inputs=detector_name,
+                             outputs=None)
+        pose_model_name.change(fn=model.pose_model.set_model,
+                               inputs=pose_model_name,
+                               outputs=None)
+        predict_button.click(fn=model.run,
+                             inputs=[
+                                 input_video,
+                                 detector_name,
+                                 pose_model_name,
+                                 det_score_threshold,
+                                 max_num_frames,
+                                 vis_kpt_score_threshold,
+                                 vis_dot_radius,
+                                 vis_line_thickness,
+                             ],
+                             outputs=[
+                                 result,
+                                 pose_preds,
+                             ])
+        redraw_button.click(fn=model.visualize_pose_results,
+                            inputs=[
+                                input_video,
+                                pose_preds,
+                                vis_kpt_score_threshold,
+                                vis_dot_radius,
+                                vis_line_thickness,
+                            ],
+                            outputs=result)
+        example_videos.click(fn=set_example_video,
+                             inputs=example_videos,
+                             outputs=input_video)
+    demo.launch(
+        enable_queue=args.enable_queue,
+        server_port=args.port,
+        share=args.share,
+    )
+if __name__ == '__main__':
+    main()

mmdet_configs/LICENSE ADDED Viewed

	@@ -0,0 +1,203 @@

+Copyright 2018-2023 OpenMMLab. All rights reserved.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2018-2023 OpenMMLab.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

mmdet_configs/README.md ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ `configs.tar` is a tarball of https://github.com/open-mmlab/mmdetection/tree/v2.24.1/configs.
2	+ The license file of the mmdetection is also included in this directory.

mmdet_configs/configs.tar ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d2091e07da6b74a6cd694e895b653485f7ce9d5d17738a415ca77a56940b989
+size 3389440

model.py ADDED Viewed

	@@ -0,0 +1,278 @@

+from __future__ import annotations
+import os
+import subprocess
+import sys
+import tempfile
+if os.getenv('SYSTEM') == 'spaces':
+    import mim
+    mim.uninstall('mmcv-full', confirm_yes=True)
+    mim.install('mmcv-full==1.5.0', is_yes=True)
+    subprocess.call('pip uninstall -y opencv-python'.split())
+    subprocess.call('pip uninstall -y opencv-python-headless'.split())
+    subprocess.call('pip install opencv-python-headless==4.5.5.64'.split())
+import cv2
+import huggingface_hub
+import numpy as np
+import torch
+import torch.nn as nn
+sys.path.insert(0, 'ViTPose/')
+from mmdet.apis import inference_detector, init_detector
+from mmpose.apis import (inference_top_down_pose_model, init_pose_model,
+                         process_mmdet_results, vis_pose_result)
+HF_TOKEN = os.environ['HF_TOKEN']
+class DetModel:
+    MODEL_DICT = {
+        'YOLOX-tiny': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_tiny_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth',
+        },
+        'YOLOX-s': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_s_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth',
+        },
+        'YOLOX-l': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_l_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth',
+        },
+        'YOLOX-x': {
+            'config':
+            'mmdet_configs/configs/yolox/yolox_x_8x8_300e_coco.py',
+            'model':
+            'https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth',
+        },
+    }
+    def __init__(self, device: str | torch.device):
+        self.device = torch.device(device)
+        self._load_all_models_once()
+        self.model_name = 'YOLOX-l'
+        self.model = self._load_model(self.model_name)
+    def _load_all_models_once(self) -> None:
+        for name in self.MODEL_DICT:
+            self._load_model(name)
+    def _load_model(self, name: str) -> nn.Module:
+        dic = self.MODEL_DICT[name]
+        return init_detector(dic['config'], dic['model'], device=self.device)
+    def set_model(self, name: str) -> None:
+        if name == self.model_name:
+            return
+        self.model_name = name
+        self.model = self._load_model(name)
+    def detect_and_visualize(
+            self, image: np.ndarray,
+            score_threshold: float) -> tuple[list[np.ndarray], np.ndarray]:
+        out = self.detect(image)
+        vis = self.visualize_detection_results(image, out, score_threshold)
+        return out, vis
+    def detect(self, image: np.ndarray) -> list[np.ndarray]:
+        image = image[:, :, ::-1]  # RGB -> BGR
+        out = inference_detector(self.model, image)
+        return out
+    def visualize_detection_results(
+            self,
+            image: np.ndarray,
+            detection_results: list[np.ndarray],
+            score_threshold: float = 0.3) -> np.ndarray:
+        person_det = [detection_results[0]] + [np.array([]).reshape(0, 5)] * 79
+        image = image[:, :, ::-1]  # RGB -> BGR
+        vis = self.model.show_result(image,
+                                     person_det,
+                                     score_thr=score_threshold,
+                                     bbox_color=None,
+                                     text_color=(200, 200, 200),
+                                     mask_color=None)
+        return vis[:, :, ::-1]  # BGR -> RGB
+class PoseModel:
+    MODEL_DICT = {
+        'ViTPose-B (single-task train)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
+            'model': 'models/vitpose-b.pth',
+        },
+        'ViTPose-L (single-task train)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
+            'model': 'models/vitpose-l.pth',
+        },
+        'ViTPose-B (multi-task train, COCO)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py',
+            'model': 'models/vitpose-b-multi-coco.pth',
+        },
+        'ViTPose-L (multi-task train, COCO)': {
+            'config':
+            'ViTPose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py',
+            'model': 'models/vitpose-l-multi-coco.pth',
+        },
+    }
+    def __init__(self, device: str | torch.device):
+        self.device = torch.device(device)
+        self.model_name = 'ViTPose-B (multi-task train, COCO)'
+        self.model = self._load_model(self.model_name)
+    def _load_all_models_once(self) -> None:
+        for name in self.MODEL_DICT:
+            self._load_model(name)
+    def _load_model(self, name: str) -> nn.Module:
+        dic = self.MODEL_DICT[name]
+        ckpt_path = huggingface_hub.hf_hub_download('hysts/ViTPose',
+                                                    dic['model'],
+                                                    use_auth_token=HF_TOKEN)
+        model = init_pose_model(dic['config'], ckpt_path, device=self.device)
+        return model
+    def set_model(self, name: str) -> None:
+        if name == self.model_name:
+            return
+        self.model_name = name
+        self.model = self._load_model(name)
+    def predict_pose_and_visualize(
+        self,
+        image: np.ndarray,
+        det_results: list[np.ndarray],
+        box_score_threshold: float,
+        kpt_score_threshold: float,
+        vis_dot_radius: int,
+        vis_line_thickness: int,
+    ) -> tuple[list[dict[str, np.ndarray]], np.ndarray]:
+        out = self.predict_pose(image, det_results, box_score_threshold)
+        vis = self.visualize_pose_results(image, out, kpt_score_threshold,
+                                          vis_dot_radius, vis_line_thickness)
+        return out, vis
+    def predict_pose(
+            self,
+            image: np.ndarray,
+            det_results: list[np.ndarray],
+            box_score_threshold: float = 0.5) -> list[dict[str, np.ndarray]]:
+        image = image[:, :, ::-1]  # RGB -> BGR
+        person_results = process_mmdet_results(det_results, 1)
+        out, _ = inference_top_down_pose_model(self.model,
+                                               image,
+                                               person_results=person_results,
+                                               bbox_thr=box_score_threshold,
+                                               format='xyxy')
+        return out
+    def visualize_pose_results(self,
+                               image: np.ndarray,
+                               pose_results: list[dict[str, np.ndarray]],
+                               kpt_score_threshold: float = 0.3,
+                               vis_dot_radius: int = 4,
+                               vis_line_thickness: int = 1) -> np.ndarray:
+        image = image[:, :, ::-1]  # RGB -> BGR
+        vis = vis_pose_result(self.model,
+                              image,
+                              pose_results,
+                              kpt_score_thr=kpt_score_threshold,
+                              radius=vis_dot_radius,
+                              thickness=vis_line_thickness)
+        return vis[:, :, ::-1]  # BGR -> RGB
+class AppModel:
+    def __init__(self, device: str | torch.device):
+        self.det_model = DetModel(device)
+        self.pose_model = PoseModel(device)
+    def run(
+        self, video_path: str, det_model_name: str, pose_model_name: str,
+        box_score_threshold: float, max_num_frames: int,
+        kpt_score_threshold: float, vis_dot_radius: int,
+        vis_line_thickness: int
+    ) -> tuple[str, list[list[dict[str, np.ndarray]]]]:
+        if video_path is None:
+            return
+        self.det_model.set_model(det_model_name)
+        self.pose_model.set_model(pose_model_name)
+        cap = cv2.VideoCapture(video_path)
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        preds_all = []
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        temp_file = tempfile.NamedTemporaryFile(suffix='.mp4')
+        writer = cv2.VideoWriter(temp_file.name, fourcc, fps, (width, height))
+        for _ in range(max_num_frames):
+            ok, frame = cap.read()
+            if not ok:
+                break
+            rgb_frame = frame[:, :, ::-1]
+            det_preds = self.det_model.detect(rgb_frame)
+            preds, vis = self.pose_model.predict_pose_and_visualize(
+                rgb_frame, det_preds, box_score_threshold, kpt_score_threshold,
+                vis_dot_radius, vis_line_thickness)
+            preds_all.append(preds)
+            writer.write(vis[:, :, ::-1])
+        cap.release()
+        writer.release()
+        out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
+        subprocess.run(
+            f'ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}'
+            .split())
+        return out_file.name, preds_all
+    def visualize_pose_results(self, video_path: str,
+                               pose_preds_all: list[list[dict[str,
+                                                              np.ndarray]]],
+                               kpt_score_threshold: float, vis_dot_radius: int,
+                               vis_line_thickness: int) -> str:
+        if video_path is None or pose_preds_all is None:
+            return
+        cap = cv2.VideoCapture(video_path)
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        temp_file = tempfile.NamedTemporaryFile(suffix='.mp4')
+        writer = cv2.VideoWriter(temp_file.name, fourcc, fps, (width, height))
+        for pose_preds in pose_preds_all:
+            ok, frame = cap.read()
+            if not ok:
+                break
+            rgb_frame = frame[:, :, ::-1]
+            vis = self.pose_model.visualize_pose_results(
+                rgb_frame, pose_preds, kpt_score_threshold, vis_dot_radius,
+                vis_line_thickness)
+            writer.write(vis[:, :, ::-1])
+        cap.release()
+        writer.release()
+        out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
+        subprocess.run(
+            f'ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}'
+            .split())
+        return out_file.name

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+mmcv-full==1.5.0
+mmdet==2.24.1
+mmpose==0.25.1
+numpy==1.22.4
+opencv-python-headless==4.5.5.64
+openmim==0.1.5
+timm==0.5.4
+torch==1.11.0
+torchvision==0.12.0

style.css ADDED Viewed

	@@ -0,0 +1,17 @@

+h1 {
+  text-align: center;
+}
+/*
+div#input_video {
+  max-width: 600px;
+  max-height: 600px;
+}
+div#result {
+  max-width: 600px;
+  max-height: 600px;
+}
+*/
+img#visitor-badge {
+  display: block;
+  margin: auto;
+}

videos/README.md ADDED Viewed

	@@ -0,0 +1,6 @@

+These videos are from the following public domain:
+- https://www.pexels.com/video/young-guy-doing-break-dance-on-the-street-5362370/
+- https://www.pexels.com/video/a-woman-dancing-at-home-6003986/
+- https://www.pexels.com/video/long-haired-man-dancing-in-a-library-6344381/
+- https://www.pexels.com/video/a-female-model-dancing-around-6815069/

videos/pexels-allan-mas-5362370.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:747f9c2f9d19e4955603e1a13b69663187882d4c6a8fbcad18ddbd04ee792d4d
+size 1972564

videos/pexels-artem-podrez-6003986.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1044083afc06aa6f956838c7fcd582c9cfd59ea3a994adc8a0f5889ffca4d9c8
+size 2494082

videos/pexels-c-technical-6344381.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7763476045f4683d53d751fb8befaf637c0101a0693e72f5b582e6aa5ac63cac
+size 3967587

videos/pexels-roman-odintsov-6815069.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44045b239c0f523bfeedc5871019ae9f67525fcf65ba46d7ca4516994e6b2f57
+size 2617714