Spaces:

jaimin
/

vital_prediction

Sleeping

App Files Files Community

jaimin commited on 23 days ago

Commit

bf53f45

•

1 Parent(s): 1c69723

Upload 78 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

age_estimator/.DS_Store +0 -0
age_estimator/__init__.py +0 -0
age_estimator/__pycache__/__init__.cpython-38.pyc +0 -0
age_estimator/__pycache__/admin.cpython-38.pyc +0 -0
age_estimator/__pycache__/apps.cpython-38.pyc +0 -0
age_estimator/__pycache__/models.cpython-38.pyc +0 -0
age_estimator/__pycache__/urls.cpython-38.pyc +0 -0
age_estimator/__pycache__/views.cpython-38.pyc +0 -0
age_estimator/admin.py +3 -0
age_estimator/apps.py +6 -0
age_estimator/migrations/__init__.py +0 -0
age_estimator/migrations/__pycache__/__init__.cpython-38.pyc +0 -0
age_estimator/mivolo/.DS_Store +0 -0
age_estimator/mivolo/.flake8 +5 -0
age_estimator/mivolo/.gitignore +85 -0
age_estimator/mivolo/.isort.cfg +5 -0
age_estimator/mivolo/.pre-commit-config.yaml +31 -0
age_estimator/mivolo/CHANGELOG.md +16 -0
age_estimator/mivolo/README.md +417 -0
age_estimator/mivolo/__pycache__/demo_copy.cpython-38.pyc +0 -0
age_estimator/mivolo/demo.py +145 -0
age_estimator/mivolo/demo_copy.py +144 -0
age_estimator/mivolo/eval_pretrained.py +232 -0
age_estimator/mivolo/eval_tools.py +149 -0
age_estimator/mivolo/images/MiVOLO.jpg +0 -0
age_estimator/mivolo/infer.py +88 -0
age_estimator/mivolo/license/en_us.pdf +0 -0
age_estimator/mivolo/license/ru.pdf +0 -0
age_estimator/mivolo/measure_time.py +77 -0
age_estimator/mivolo/mivolo/__init__.py +0 -0
age_estimator/mivolo/mivolo/__pycache__/__init__.cpython-38.pyc +0 -0
age_estimator/mivolo/mivolo/__pycache__/predictor.cpython-38.pyc +0 -0
age_estimator/mivolo/mivolo/__pycache__/structures.cpython-38.pyc +0 -0
age_estimator/mivolo/mivolo/data/__init__.py +0 -0
age_estimator/mivolo/mivolo/data/__pycache__/__init__.cpython-38.pyc +0 -0
age_estimator/mivolo/mivolo/data/__pycache__/data_reader.cpython-38.pyc +0 -0
age_estimator/mivolo/mivolo/data/__pycache__/misc.cpython-38.pyc +0 -0
age_estimator/mivolo/mivolo/data/data_reader.py +125 -0
age_estimator/mivolo/mivolo/data/dataset/__init__.py +66 -0
age_estimator/mivolo/mivolo/data/dataset/age_gender_dataset.py +194 -0
age_estimator/mivolo/mivolo/data/dataset/age_gender_loader.py +169 -0
age_estimator/mivolo/mivolo/data/dataset/classification_dataset.py +47 -0
age_estimator/mivolo/mivolo/data/dataset/reader_age_gender.py +492 -0
age_estimator/mivolo/mivolo/data/misc.py +246 -0
age_estimator/mivolo/mivolo/model/__init__.py +0 -0
age_estimator/mivolo/mivolo/model/__pycache__/__init__.cpython-38.pyc +0 -0
age_estimator/mivolo/mivolo/model/__pycache__/create_timm_model.cpython-38.pyc +0 -0
age_estimator/mivolo/mivolo/model/__pycache__/cross_bottleneck_attn.cpython-38.pyc +0 -0
age_estimator/mivolo/mivolo/model/__pycache__/mi_volo.cpython-38.pyc +0 -0
age_estimator/mivolo/mivolo/model/__pycache__/mivolo_model.cpython-38.pyc +0 -0

age_estimator/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

age_estimator/__init__.py ADDED Viewed

File without changes

age_estimator/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (159 Bytes). View file

age_estimator/__pycache__/admin.cpython-38.pyc ADDED Viewed

Binary file (200 Bytes). View file

age_estimator/__pycache__/apps.cpython-38.pyc ADDED Viewed

Binary file (449 Bytes). View file

age_estimator/__pycache__/models.cpython-38.pyc ADDED Viewed

Binary file (197 Bytes). View file

age_estimator/__pycache__/urls.cpython-38.pyc ADDED Viewed

Binary file (357 Bytes). View file

age_estimator/__pycache__/views.cpython-38.pyc ADDED Viewed

Binary file (1.62 kB). View file

age_estimator/admin.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from django.contrib import admin
2	+
3	+ # Register your models here.

age_estimator/apps.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from django.apps import AppConfig
+class AgeEstimatorConfig(AppConfig):
+    default_auto_field = 'django.db.models.BigAutoField'
+    name = 'age_estimator'

age_estimator/migrations/__init__.py ADDED Viewed

File without changes

age_estimator/migrations/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (170 Bytes). View file

age_estimator/mivolo/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

age_estimator/mivolo/.flake8 ADDED Viewed

	@@ -0,0 +1,5 @@

+[flake8]
+max-line-length = 120
+inline-quotes = "
+multiline-quotes = "
+ignore = E203,W503

age_estimator/mivolo/.gitignore ADDED Viewed

	@@ -0,0 +1,85 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+*.DS_Store
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# PyTorch weights
+*.tar
+*.pth
+*.pt
+*.torch
+*.gz
+Untitled.ipynb
+Testing notebook.ipynb
+# Root dir exclusions
+/*.csv
+/*.yaml
+/*.json
+/*.jpg
+/*.png
+/*.zip
+/*.tar.*
+*.jpg
+*.png
+*.avi
+*.mp4
+*.svg
+.mypy_cache/
+.vscode/
+.idea
+output/
+input/
+run.sh

age_estimator/mivolo/.isort.cfg ADDED Viewed

	@@ -0,0 +1,5 @@

+[settings]
+profile = black
+line_length = 120
+src_paths = ["mivolo", "scripts", "tools"]
+filter_files = true

age_estimator/mivolo/.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.2.0
+  hooks:
+  - id: check-yaml
+    args: ['--unsafe']
+  - id: check-toml
+  - id: debug-statements
+  - id: end-of-file-fixer
+    exclude: poetry.lock
+  - id: trailing-whitespace
+- repo: https://github.com/PyCQA/isort
+  rev: 5.12.0
+  hooks:
+  - id: isort
+    args: [ "--profile", "black", "--filter-files" ]
+- repo: https://github.com/psf/black
+  rev: 22.3.0
+  hooks:
+    - id: black
+      args: ["--line-length", "120"]
+- repo: https://github.com/PyCQA/flake8
+  rev: 3.9.2
+  hooks:
+    - id: flake8
+      args: [ "--config", ".flake8" ]
+      additional_dependencies: [ "flake8-quotes" ]
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v0.942
+  hooks:
+    - id: mypy

age_estimator/mivolo/CHANGELOG.md ADDED Viewed

	@@ -0,0 +1,16 @@

+## 0.4.1dev (15.08.2023)
+### Added
+- Support for video streams, including YouTube URLs
+- Instructions and explanations for various export types.
+### Changed
+- Removed CutOff operation. It has been proven to be ineffective for inference time and quite costly at the same time. Now it is only used during training.
+## 0.4.2dev (22.09.2023)
+### Added
+- Script for AgeDB dataset convertation to csv format
+- Additional metrics were added to README

age_estimator/mivolo/README.md ADDED Viewed

	@@ -0,0 +1,417 @@

+<div align="center">
+<p>
+   <a align="center" target="_blank">
+   <img width="900" src="./images/MiVOLO.jpg"></a>
+</p>
+<br>
+</div>
+## MiVOLO: Multi-input Transformer for Age and Gender Estimation
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/mivolo-multi-input-transformer-for-age-and/age-estimation-on-utkface)](https://paperswithcode.com/sota/age-estimation-on-utkface?p=mivolo-multi-input-transformer-for-age-and) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/beyond-specialization-assessing-the-1/age-estimation-on-imdb-clean)](https://paperswithcode.com/sota/age-estimation-on-imdb-clean?p=beyond-specialization-assessing-the-1) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/beyond-specialization-assessing-the-1/facial-attribute-classification-on-fairface)](https://paperswithcode.com/sota/facial-attribute-classification-on-fairface?p=beyond-specialization-assessing-the-1) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/beyond-specialization-assessing-the-1/age-and-gender-classification-on-adience)](https://paperswithcode.com/sota/age-and-gender-classification-on-adience?p=beyond-specialization-assessing-the-1) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/beyond-specialization-assessing-the-1/age-and-gender-classification-on-adience-age)](https://paperswithcode.com/sota/age-and-gender-classification-on-adience-age?p=beyond-specialization-assessing-the-1) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/beyond-specialization-assessing-the-1/age-and-gender-estimation-on-lagenda-age)](https://paperswithcode.com/sota/age-and-gender-estimation-on-lagenda-age?p=beyond-specialization-assessing-the-1) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/beyond-specialization-assessing-the-1/gender-prediction-on-lagenda)](https://paperswithcode.com/sota/gender-prediction-on-lagenda?p=beyond-specialization-assessing-the-1) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/mivolo-multi-input-transformer-for-age-and/age-estimation-on-agedb)](https://paperswithcode.com/sota/age-estimation-on-agedb?p=mivolo-multi-input-transformer-for-age-and) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/mivolo-multi-input-transformer-for-age-and/gender-prediction-on-agedb)](https://paperswithcode.com/sota/gender-prediction-on-agedb?p=mivolo-multi-input-transformer-for-age-and) [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/beyond-specialization-assessing-the-1/age-estimation-on-cacd)](https://paperswithcode.com/sota/age-estimation-on-cacd?p=beyond-specialization-assessing-the-1)
+> [**MiVOLO: Multi-input Transformer for Age and Gender Estimation**](https://arxiv.org/abs/2307.04616),
+> Maksim Kuprashevich, Irina Tolstykh,
+> *2023 [arXiv 2307.04616](https://arxiv.org/abs/2307.04616)*
+> [**Beyond Specialization: Assessing the Capabilities of MLLMs in Age and Gender Estimation**](https://arxiv.org/abs/2403.02302),
+> Maksim Kuprashevich, Grigorii Alekseenko, Irina Tolstykh
+> *2024 [arXiv 2403.02302](https://arxiv.org/abs/2403.02302)*
+[[`Paper 2023`](https://arxiv.org/abs/2307.04616)] [[`Paper 2024`](https://arxiv.org/abs/2403.02302)] [[`Demo`](https://huggingface.co/spaces/iitolstykh/age_gender_estimation_demo)] [[`Telegram Bot`](https://t.me/AnyAgeBot)] [[`BibTex`](#citing)] [[`Data`](https://wildchlamydia.github.io/lagenda/)]
+## MiVOLO pretrained models
+Gender & Age recognition performance.
+<table style="margin: auto">
+  <tr>
+    <th align="left">Model</th>
+    <th align="left" style="color:LightBlue">Type</th>
+    <th align="left">Dataset (train and test)</th>
+    <th align="left">Age MAE</th>
+    <th align="left">Age CS@5</th>
+    <th align="left">Gender Accuracy</th>
+    <th align="left">download</th>
+  </tr>
+  <tr>
+    <td>volo_d1</td>
+    <td align="left">face_only, age</td>
+    <td align="left">IMDB-cleaned</td>
+    <td align="left">4.29</td>
+    <td align="left">67.71</td>
+    <td align="left">-</td>
+    <td><a href="https://drive.google.com/file/d/17ysOqgG3FUyEuxrV3Uh49EpmuOiGDxrq/view?usp=drive_link">checkpoint</a></td>
+  </tr>
+    <tr>
+    <td>volo_d1</td>
+    <td align="left">face_only, age, gender</td>
+    <td align="left">IMDB-cleaned</td>
+    <td align="left">4.22</td>
+    <td align="left">68.68</td>
+    <td align="left">99.38</td>
+    <td><a href="https://drive.google.com/file/d/1NlsNEVijX2tjMe8LBb1rI56WB_ADVHeP/view?usp=drive_link">checkpoint</a></td>
+  </tr>
+    <tr>
+    <td>mivolo_d1</td>
+    <td align="left">face_body, age, gender</td>
+    <td align="left">IMDB-cleaned</td>
+    <td align="left">4.24 [face+body]<br>6.87 [body]</td>
+    <td align="left">68.32 [face+body]<br>46.32 [body]</td>
+    <td align="left">99.46 [face+body]<br>96.48 [body]</td>
+    <td><a href="https://drive.google.com/file/d/11i8pKctxz3wVkDBlWKvhYIh7kpVFXSZ4/view?usp=drive_link">model_imdb_cross_person_4.24_99.46.pth.tar</a></td>
+  </tr>
+  <tr>
+    <td>volo_d1</td>
+    <td align="left">face_only, age</td>
+    <td align="left">UTKFace</td>
+    <td align="left">4.23</td>
+    <td align="left">69.72</td>
+    <td align="left">-</td>
+    <td><a href="https://drive.google.com/file/d/1LtDfAJrWrw-QA9U5IuC3_JImbvAQhrJE/view?usp=drive_link">checkpoint</a></td>
+  </tr>
+    <tr>
+    <td>volo_d1</td>
+    <td align="left">face_only, age, gender</td>
+    <td align="left">UTKFace</td>
+    <td align="left">4.23</td>
+    <td align="left">69.78</td>
+    <td align="left">97.69</td>
+    <td><a href="https://drive.google.com/file/d/1hKFmIR6fjHMevm-a9uPEAkDLrTAh-W4D/view?usp=drive_link">checkpoint</a></td>
+  </tr>
+  <tr>
+    <td>mivolo_d1</td>
+    <td align="left">face_body, age, gender</td>
+    <td align="left">Lagenda</td>
+    <td align="left">3.99 [face+body]</td>
+    <td align="left">71.27 [face+body]</td>
+    <td align="left">97.36 [face+body]</td>
+    <td><a href="https://huggingface.co/spaces/iitolstykh/demo">demo</a></td>
+  </tr>
+  <tr>
+    <td>mivolov2_d1_384x384</td>
+    <td align="left">face_body, age, gender</td>
+    <td align="left">Lagenda</td>
+    <td align="left">3.65 [face+body]</td>
+    <td align="left">74.48 [face+body]</td>
+    <td align="left">97.99 [face+body]</td>
+    <td><a href="https://t.me/AnyAgeBot">telegram bot</a></td>
+  </tr>
+</table>
+## MiVOLO regression benchmarks
+Gender & Age recognition performance.
+Use [valid_age_gender.sh](scripts/valid_age_gender.sh) to reproduce results with our checkpoints.
+<table style="margin: auto">
+  <tr>
+    <th align="left">Model</th>
+    <th align="left" style="color:LightBlue">Type</th>
+    <th align="left">Train Dataset</th>
+    <th align="left">Test Dataset</th>
+    <th align="left">Age MAE</th>
+    <th align="left">Age CS@5</th>
+    <th align="left">Gender Accuracy</th>
+    <th align="left">download</th>
+  </tr>
+  <tr>
+    <td>mivolo_d1</td>
+    <td align="left">face_body, age, gender</td>
+    <td align="left">Lagenda</td>
+    <td align="left">AgeDB</td>
+    <td align="left">5.55 [face]</td>
+    <td align="left">55.08 [face]</td>
+    <td align="left">98.3 [face]</td>
+    <td><a href="https://huggingface.co/spaces/iitolstykh/demo">demo</a></td>
+  </tr>
+  <tr>
+    <td>mivolo_d1</td>
+    <td align="left">face_body, age, gender</td>
+    <td align="left">IMDB-cleaned</td>
+    <td align="left">AgeDB</td>
+    <td align="left">5.58 [face]</td>
+    <td align="left">55.54 [face]</td>
+    <td align="left">97.93 [face]</td>
+    <td><a href="https://drive.google.com/file/d/11i8pKctxz3wVkDBlWKvhYIh7kpVFXSZ4/view?usp=drive_link">model_imdb_cross_person_4.24_99.46.pth.tar</a></td>
+  </tr>
+</table>
+## MiVOLO classification benchmarks
+Gender & Age recognition performance.
+<table style="margin: auto">
+  <tr>
+    <th align="left">Model</th>
+    <th align="left" style="color:LightBlue">Type</th>
+    <th align="left">Train Dataset</th>
+    <th align="left">Test Dataset</th>
+    <th align="left">Age Accuracy</th>
+    <th align="left">Gender Accuracy</th>
+  </tr>
+  <tr>
+    <td>mivolo_d1</td>
+    <td align="left">face_body, age, gender</td>
+    <td align="left">Lagenda</td>
+    <td align="left">FairFace</td>
+    <td align="left">61.07 [face+body]</td>
+    <td align="left">95.73 [face+body]</td>
+  </tr>
+  <tr>
+    <td>mivolo_d1</td>
+    <td align="left">face_body, age, gender</td>
+    <td align="left">Lagenda</td>
+    <td align="left">Adience</td>
+    <td align="left">68.69 [face]</td>
+    <td align="left">96.51[face]</td>
+  </tr>
+  <tr>
+    <td>mivolov2_d1_384</td>
+    <td align="left">face_body, age, gender</td>
+    <td align="left">Lagenda</td>
+    <td align="left">Adience</td>
+    <td align="left">69.43 [face]</td>
+    <td align="left">97.39[face]</td>
+  </tr>
+</table>
+## Dataset
+**Please, [cite our papers](#citing) if you use any this data!**
+- Lagenda dataset: [images](https://drive.google.com/file/d/1QXO0NlkABPZT6x1_0Uc2i6KAtdcrpTbG/view?usp=sharing) and [annotation](https://drive.google.com/file/d/1mNYjYFb3MuKg-OL1UISoYsKObMUllbJx/view?usp=sharing).
+- IMDB-clean: follow [these instructions](https://github.com/yiminglin-ai/imdb-clean) to get images and [download](https://drive.google.com/file/d/17uEqyU3uQ5trWZ5vRJKzh41yeuDe5hyL/view?usp=sharing) our annotations.
+- UTK dataset: [origin full images](https://susanqq.github.io/UTKFace/) and our annotation: [split from the article](https://drive.google.com/file/d/1Fo1vPWrKtC5bPtnnVWNTdD4ZTKRXL9kv/view?usp=sharing), [random full split](https://drive.google.com/file/d/177AV631C3SIfi5nrmZA8CEihIt29cznJ/view?usp=sharing).
+- Adience dataset: follow [these instructions](https://talhassner.github.io/home/projects/Adience/Adience-data.html) to get images and [download](https://drive.google.com/file/d/1wS1Q4FpksxnCR88A1tGLsLIr91xHwcVv/view?usp=sharing) our annotations.
+   <details>
+      <summary>Click to expand!</summary>
+   After downloading them, your `data` directory should look something like this:
+   ```console
+   data
+   └── Adience
+       ├── annotations  (folder with our annotations)
+       ├── aligned      (will not be used)
+       ├── faces
+       ├── fold_0_data.txt
+       ├── fold_1_data.txt
+       ├── fold_2_data.txt
+       ├── fold_3_data.txt
+       └── fold_4_data.txt
+   ```
+   We use coarse aligned images from `faces/` dir.
+   Using our detector we found a face bbox for each image (see [tools/prepare_adience.py](tools/prepare_adience.py)).
+   This dataset has five folds. The performance metric is accuracy on five-fold cross validation.
+   | images before removal | fold 0 | fold 1 | fold 2 | fold 3 | fold 4 |
+   | --------------------- | ------ | ------ | ------ | ------ | ------ |
+   | 19,370                | 4,484  | 3,730  | 3,894  | 3,446  | 3,816  |
+   Not complete data
+   | only age not found | only gender not found | SUM           |
+   | ------------------ | --------------------- | ------------- |
+   | 40                 | 1170                  | 1,210 (6.2 %) |
+   Removed data
+   | failed to process image | age and gender not found | SUM         |
+   | ----------------------- | ------------------------ | ----------- |
+   | 0                       | 708                      | 708 (3.6 %) |
+   Genders
+   | female | male  |
+   | ------ | ----- |
+   | 9,372  | 8,120 |
+   Ages (8 classes) after mapping to not intersected ages intervals
+   | 0-2   | 4-6   | 8-12  | 15-20 | 25-32 | 38-43 | 48-53 | 60-100 |
+   | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ------ |
+   | 2,509 | 2,140 | 2,293 | 1,791 | 5,589 | 2,490 | 909   | 901    |
+   </details>
+- FairFace dataset: follow [these instructions](https://github.com/joojs/fairface) to get images and [download](https://drive.google.com/file/d/1EdY30A1SQmox96Y39VhBxdgALYhbkzdm/view?usp=drive_link) our annotations.
+    <details>
+      <summary>Click to expand!</summary>
+    After downloading them, your `data` directory should look something like this:
+    ```console
+    data
+    └── FairFace
+       ├── annotations  (folder with our annotations)
+       ├── fairface-img-margin025-trainval   (will not be used)
+           ├── train
+           ├── val
+       ├── fairface-img-margin125-trainval
+           ├── train
+           ├── val
+       ├── fairface_label_train.csv
+       ├── fairface_label_val.csv
+    ```
+    We use aligned images from `fairface-img-margin125-trainval/` dir.
+    Using our detector we found a face bbox for each image and added a person bbox if it was possible (see [tools/prepare_fairface.py](tools/prepare_fairface.py)).
+    This dataset has 2 splits: train and val. The performance metric is accuracy on validation.
+    | images train | images val |
+    | ------------ | ---------- |
+    | 86,744       | 10,954     |
+    Genders for **validation**
+    | female | male  |
+    | ------ | ----- |
+    | 5,162  | 5,792 |
+    Ages for **validation** (9 classes):
+    | 0-2 | 3-9   | 10-19 | 20-29 | 30-39 | 40-49 | 50-59 | 60-69 | 70+ |
+    | --- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | --- |
+    | 199 | 1,356 | 1,181 | 3,300 | 2,330 | 1,353 | 796   | 321   | 118 |
+    </details>
+- AgeDB dataset: follow [these instructions](https://ibug.doc.ic.ac.uk/resources/agedb/) to get images and [download](https://drive.google.com/file/d/1Dp72BUlAsyUKeSoyE_DOsFRS1x6ZBJen/view) our annotations.
+    <details>
+      <summary>Click to expand!</summary>
+  **Ages**: 1 - 101
+  **Genders**: 9788 faces of `M`, 6700 faces of `F`
+  | images 0 | images 1 | images 2 | images 3 | images 4 | images 5 | images 6 | images 7 | images 8 | images 9 |
+  |----------|----------|----------|----------|----------|----------|----------|----------|----------|----------|
+  | 1701     | 1721     | 1615     | 1619     | 1626     | 1643     | 1634     | 1596     | 1676     | 1657     |
+    Data splits were taken from [here](https://github.com/paplhjak/Facial-Age-Estimation-Benchmark-Databases)
+    !! **All splits(all dataset) were used for models evaluation.**
+    </details>
+## Install
+Install pytorch 1.13+ and other requirements.
+```
+pip install -r requirements.txt
+pip install .
+```
+## Demo
+1. [Download](https://drive.google.com/file/d/1CGNCkZQNj5WkP3rLpENWAOgrBQkUWRdw/view) body + face detector model to `models/yolov8x_person_face.pt`
+2. [Download](https://drive.google.com/file/d/11i8pKctxz3wVkDBlWKvhYIh7kpVFXSZ4/view) mivolo checkpoint to `models/mivolo_imbd.pth.tar`
+```bash
+wget https://variety.com/wp-content/uploads/2023/04/MCDNOHA_SP001.jpg -O jennifer_lawrence.jpg
+python3 demo.py \
+--input "jennifer_lawrence.jpg" \
+--output "output" \
+--detector-weights "models/yolov8x_person_face.pt " \
+--checkpoint "models/mivolo_imbd.pth.tar" \
+--device "cuda:0" \
+--with-persons \
+--draw
+```
+To run demo for a youtube video:
+```bash
+python3 demo.py \
+--input "https://www.youtube.com/shorts/pVh32k0hGEI" \
+--output "output" \
+--detector-weights "models/yolov8x_person_face.pt" \
+--checkpoint "models/mivolo_imbd.pth.tar" \
+--device "cuda:0" \
+--draw \
+--with-persons
+```
+## Validation
+To reproduce validation metrics:
+1. Download prepared annotations for imbd-clean / utk / adience / lagenda  / fairface.
+2. Download checkpoint
+3. Run validation:
+```bash
+python3 eval_pretrained.py \
+  --dataset_images /path/to/dataset/utk/images \
+  --dataset_annotations /path/to/dataset/utk/annotation \
+  --dataset_name utk \
+  --split valid \
+  --batch-size 512 \
+  --checkpoint models/mivolo_imbd.pth.tar \
+  --half \
+  --with-persons \
+  --device "cuda:0"
+````
+Supported dataset names: "utk", "imdb", "lagenda", "fairface", "adience".
+## Changelog
+[CHANGELOG.md](CHANGELOG.md)
+## ONNX and TensorRT export
+As of now (11.08.2023), while ONNX export is technically feasible, it is not advisable due to the poor performance of the resulting model with batch processing.
+**TensorRT** and **OpenVINO** export is impossible due to its lack of support for col2im.
+If you remain absolutely committed to utilizing ONNX export, you can refer to [these instructions](https://github.com/WildChlamydia/MiVOLO/issues/14#issuecomment-1675245889).
+The most highly recommended export method at present **is using TorchScript**. You can achieve this with a single line of code:
+```python
+torch.jit.trace(model)
+```
+This approach provides you with a model that maintains its original speed and only requires a single file for usage, eliminating the need for additional code.
+## License
+Please, see [here](./license)
+## Citing
+If you use our models, code or dataset, we kindly request you to cite the following paper and give repository a :star:
+```bibtex
+@article{mivolo2023,
+   Author = {Maksim Kuprashevich and Irina Tolstykh},
+   Title = {MiVOLO: Multi-input Transformer for Age and Gender Estimation},
+   Year = {2023},
+   Eprint = {arXiv:2307.04616},
+}
+```
+```bibtex
+@article{mivolo2024,
+   Author = {Maksim Kuprashevich and Grigorii Alekseenko and Irina Tolstykh},
+   Title = {Beyond Specialization: Assessing the Capabilities of MLLMs in Age and Gender Estimation},
+   Year = {2024},
+   Eprint = {arXiv:2403.02302},
+}
+```

age_estimator/mivolo/__pycache__/demo_copy.cpython-38.pyc ADDED Viewed

Binary file (4.06 kB). View file

age_estimator/mivolo/demo.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import argparse
+import logging
+import os
+import random
+import cv2
+import torch
+import yt_dlp
+from mivolo.data.data_reader import InputType, get_all_files, get_input_type
+from mivolo.predictor import Predictor
+from timm.utils import setup_default_logging
+_logger = logging.getLogger("inference")
+def get_direct_video_url(video_url):
+    ydl_opts = {
+        "format": "bestvideo",
+        "quiet": True,  # Suppress terminal output
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info_dict = ydl.extract_info(video_url, download=False)
+        if "url" in info_dict:
+            direct_url = info_dict["url"]
+            resolution = (info_dict["width"], info_dict["height"])
+            fps = info_dict["fps"]
+            yid = info_dict["id"]
+            return direct_url, resolution, fps, yid
+    return None, None, None, None
+def get_local_video_info(vid_uri):
+    cap = cv2.VideoCapture(vid_uri)
+    if not cap.isOpened():
+        raise ValueError(f"Failed to open video source {vid_uri}")
+    res = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    return res, fps
+def get_random_frames(cap, num_frames):
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_indices = random.sample(range(total_frames), num_frames)
+    frames = []
+    for idx in frame_indices:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
+        ret, frame = cap.read()
+        if ret:
+            frames.append(frame)
+    return frames
+def get_parser():
+    parser = argparse.ArgumentParser(description="PyTorch MiVOLO Inference")
+    parser.add_argument("--input", type=str, default=None, required=True, help="image file or folder with images")
+    parser.add_argument("--output", type=str, default=None, required=True, help="folder for output results")
+    parser.add_argument("--detector-weights", type=str, default=None, required=True, help="Detector weights (YOLOv8).")
+    parser.add_argument("--checkpoint", default="", type=str, required=True, help="path to mivolo checkpoint")
+    parser.add_argument(
+        "--with-persons", action="store_true", default=False, help="If set model will run with persons, if available"
+    )
+    parser.add_argument(
+        "--disable-faces", action="store_true", default=False, help="If set model will use only persons if available"
+    )
+    parser.add_argument("--draw", action="store_true", default=False, help="If set, resulted images will be drawn")
+    parser.add_argument("--device", default="cuda", type=str, help="Device (accelerator) to use.")
+    return parser
+def main():
+    parser = get_parser()
+    setup_default_logging()
+    args = parser.parse_args()
+    if torch.cuda.is_available():
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.benchmark = True
+    os.makedirs(args.output, exist_ok=True)
+    predictor = Predictor(args, verbose=True)
+    input_type = get_input_type(args.input)
+    if input_type == InputType.Video or input_type == InputType.VideoStream:
+        if "youtube" in args.input:
+            args.input, res, fps, yid = get_direct_video_url(args.input)
+            if not args.input:
+                raise ValueError(f"Failed to get direct video url {args.input}")
+        else:
+            cap = cv2.VideoCapture(args.input)
+            if not cap.isOpened():
+                raise ValueError(f"Failed to open video source {args.input}")
+            # Extract 4-5 random frames from the video
+            random_frames = get_random_frames(cap, num_frames=5)
+            age_list = []
+            for frame in random_frames:
+                detected_objects, out_im, age = predictor.recognize(frame)
+                age_list.append(age[0])
+                if args.draw:
+                    bname = os.path.splitext(os.path.basename(args.input))[0]
+                    filename = os.path.join(args.output, f"out_{bname}.jpg")
+                    cv2.imwrite(filename, out_im)
+                    _logger.info(f"Saved result to {filename}")
+            # Calculate and print average age
+            avg_age = sum(age_list) / len(age_list) if age_list else 0
+            print(f"Age list: {age_list}")
+            print(f"Average age: {avg_age:.2f}")
+            absolute_age = round(abs(avg_age))
+            # Define the range
+            lower_bound = absolute_age - 2
+            upper_bound = absolute_age + 2
+        return absolute_age, lower_bound, upper_bound
+    elif input_type == InputType.Image:
+        image_files = get_all_files(args.input) if os.path.isdir(args.input) else [args.input]
+        for img_p in image_files:
+            img = cv2.imread(img_p)
+            detected_objects, out_im, age = predictor.recognize(img)
+            if args.draw:
+                bname = os.path.splitext(os.path.basename(img_p))[0]
+                filename = os.path.join(args.output, f"out_{bname}.jpg")
+                cv2.imwrite(filename, out_im)
+                _logger.info(f"Saved result to {filename}")
+if __name__ == "__main__":
+    absolute_age, lower_bound, upper_bound = main()
+    # Output the results in the desired format
+    print(f"Absolute Age: {absolute_age}")
+    print(f"Range: {lower_bound} - {upper_bound}")

age_estimator/mivolo/demo_copy.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import argparse
+import logging
+import os
+import random
+import cv2
+import torch
+import yt_dlp
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '././')))
+from mivolo.data.data_reader import InputType, get_all_files, get_input_type
+from mivolo.predictor import Predictor
+from timm.utils import setup_default_logging
+_logger = logging.getLogger("inference")
+def get_direct_video_url(video_url):
+    ydl_opts = {
+        "format": "bestvideo",
+        "quiet": True,  # Suppress terminal output
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info_dict = ydl.extract_info(video_url, download=False)
+        if "url" in info_dict:
+            direct_url = info_dict["url"]
+            resolution = (info_dict["width"], info_dict["height"])
+            fps = info_dict["fps"]
+            yid = info_dict["id"]
+            return direct_url, resolution, fps, yid
+    return None, None, None, None
+def get_random_frames(cap, num_frames):
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_indices = random.sample(range(total_frames), num_frames)
+    frames = []
+    for idx in frame_indices:
+        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
+        ret, frame = cap.read()
+        if ret:
+            frames.append(frame)
+    return frames
+def get_parser():
+    parser = argparse.ArgumentParser(description="PyTorch MiVOLO Inference")
+    parser.add_argument("--input", type=str, default=None, required=True, help="image file or folder with images")
+    parser.add_argument("--output", type=str, default=None, required=True, help="folder for output results")
+    parser.add_argument("--detector-weights", type=str, default=None, required=True, help="Detector weights (YOLOv8).")
+    parser.add_argument("--checkpoint", default="", type=str, required=True, help="path to mivolo checkpoint")
+    parser.add_argument(
+        "--with_persons", action="store_true", default=False, help="If set model will run with persons, if available"
+    )
+    parser.add_argument(
+        "--disable_faces", action="store_true", default=False, help="If set model will use only persons if available"
+    )
+    parser.add_argument("--draw", action="store_true", default=False, help="If set, resulted images will be drawn")
+    parser.add_argument("--device", default="cpu", type=str, help="Device (accelerator) to use.")
+    return parser
+def main(video_path, output_folder, detector_weights, checkpoint, device, with_persons, disable_faces,draw=False):
+    setup_default_logging()
+    if torch.cuda.is_available():
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.benchmark = True
+    os.makedirs(output_folder, exist_ok=True)
+    # Initialize predictor
+    args = argparse.Namespace(
+        input=video_path,
+        output=output_folder,
+        detector_weights=detector_weights,
+        checkpoint=checkpoint,
+        draw=draw,
+        device=device,
+        with_persons=with_persons,
+        disable_faces=disable_faces
+    )
+    predictor = Predictor(args, verbose=True)
+    if "youtube" in video_path:
+        video_path, res, fps, yid = get_direct_video_url(video_path)
+        if not video_path:
+            raise ValueError(f"Failed to get direct video url {video_path}")
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        raise ValueError(f"Failed to open video source {video_path}")
+    # Extract 4-5 random frames from the video
+    random_frames = get_random_frames(cap, num_frames=10)
+    age_list = []
+    for frame in random_frames:
+        detected_objects, out_im, age = predictor.recognize(frame)
+        try:
+            age_list.append(age[0])  # Attempt to access the first element of age
+            if draw:
+                bname = os.path.splitext(os.path.basename(video_path))[0]
+                filename = os.path.join(output_folder, f"out_{bname}.jpg")
+                cv2.imwrite(filename, out_im)
+                _logger.info(f"Saved result to {filename}")
+        except IndexError:
+            continue
+    if len(age_list)==0:
+        raise ValueError("No person was detected in the frame. Please upload a proper face video.")
+    # Calculate and print average age
+    avg_age = sum(age_list) / len(age_list) if age_list else 0
+    print(f"Age list: {age_list}")
+    print(f"Average age: {avg_age:.2f}")
+    absolute_age = round(abs(avg_age))
+    # Define the range
+    lower_bound = absolute_age - 2
+    upper_bound = absolute_age + 2
+    return absolute_age, lower_bound, upper_bound
+if __name__ == "__main__":
+    parser = get_parser()
+    args = parser.parse_args()
+    absolute_age, lower_bound, upper_bound = main(args.input, args.output, args.detector_weights, args.checkpoint, args.device, args.with_persons, args.disable_faces ,args.draw)
+    # Output the results in the desired format
+    print(f"Absolute Age: {absolute_age}")
+    print(f"Range: {lower_bound} - {upper_bound}")

age_estimator/mivolo/eval_pretrained.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import argparse
+import json
+import logging
+from typing import Tuple
+import matplotlib.pyplot as plt
+import seaborn as sns
+import torch
+from eval_tools import Metrics, time_sync, write_results
+from mivolo.data.dataset import build as build_data
+from mivolo.model.mi_volo import MiVOLO
+from timm.utils import setup_default_logging
+_logger = logging.getLogger("inference")
+LOG_FREQUENCY = 10
+def get_parser():
+    parser = argparse.ArgumentParser(description="PyTorch MiVOLO Validation")
+    parser.add_argument("--dataset_images", default="", type=str, required=True, help="path to images")
+    parser.add_argument("--dataset_annotations", default="", type=str, required=True, help="path to annotations")
+    parser.add_argument(
+        "--dataset_name",
+        default=None,
+        type=str,
+        required=True,
+        choices=["utk", "imdb", "lagenda", "fairface", "adience", "agedb", "cacd"],
+        help="dataset name",
+    )
+    parser.add_argument("--split", default="validation", help="dataset splits separated by comma (default: validation)")
+    parser.add_argument("--checkpoint", default="", type=str, required=True, help="path to mivolo checkpoint")
+    parser.add_argument("--batch-size", default=64, type=int, help="batch size")
+    parser.add_argument(
+        "--workers", default=4, type=int, metavar="N", help="number of data loading workers (default: 4)"
+    )
+    parser.add_argument("--device", default="cuda", type=str, help="Device (accelerator) to use.")
+    parser.add_argument("--l-for-cs", type=int, default=5, help="L for CS (cumulative score)")
+    parser.add_argument("--half", action="store_true", default=False, help="use half-precision model")
+    parser.add_argument(
+        "--with-persons", action="store_true", default=False, help="If the model will run with persons, if available"
+    )
+    parser.add_argument(
+        "--disable-faces", action="store_true", default=False, help="If the model will use only persons if available"
+    )
+    parser.add_argument("--draw-hist", action="store_true", help="Draws the hist of error by age")
+    parser.add_argument(
+        "--results-file",
+        default="",
+        type=str,
+        metavar="FILENAME",
+        help="Output csv file for validation results (summary)",
+    )
+    parser.add_argument(
+        "--results-format", default="csv", type=str, help="Format for results file one of (csv, json) (default: csv)."
+    )
+    return parser
+def process_batch(
+    mivolo_model: MiVOLO,
+    input: torch.tensor,
+    target: torch.tensor,
+    num_classes_gender: int = 2,
+):
+    start = time_sync()
+    output = mivolo_model.inference(input)
+    # target with age == -1 and gender == -1 marks that sample is not valid
+    assert not (all(target[:, 0] == -1) and all(target[:, 1] == -1))
+    if not mivolo_model.meta.only_age:
+        gender_out = output[:, :num_classes_gender]
+        gender_target = target[:, 1]
+        age_out = output[:, num_classes_gender:]
+    else:
+        age_out = output
+        gender_out, gender_target = None, None
+    # measure elapsed time
+    process_time = time_sync() - start
+    age_target = target[:, 0].unsqueeze(1)
+    return age_out, age_target, gender_out, gender_target, process_time
+def _filter_invalid_target(out: torch.tensor, target: torch.tensor):
+    # exclude samples where target gt == -1, that marks sample is not valid
+    mask = target != -1
+    return out[mask], target[mask]
+def postprocess_gender(gender_out: torch.tensor, gender_target: torch.tensor) -> Tuple[torch.tensor, torch.tensor]:
+    if gender_target is None:
+        return gender_out, gender_target
+    return _filter_invalid_target(gender_out, gender_target)
+def postprocess_age(age_out: torch.tensor, age_target: torch.tensor, dataset) -> Tuple[torch.tensor, torch.tensor]:
+    # Revert _norm_age() operation. Output is 2 float tensors
+    age_out, age_target = _filter_invalid_target(age_out, age_target)
+    age_out = age_out * (dataset.max_age - dataset.min_age) + dataset.avg_age
+    # clamp to 0 because age can be below zero
+    age_out = torch.clamp(age_out, min=0)
+    if dataset.age_classes is not None:
+        # classification case
+        age_out = torch.round(age_out)
+        if dataset._intervals.device != age_out.device:
+            dataset._intervals = dataset._intervals.to(age_out.device)
+        age_inds = torch.searchsorted(dataset._intervals, age_out, side="right") - 1
+        age_out = age_inds
+    else:
+        age_target = age_target * (dataset.max_age - dataset.min_age) + dataset.avg_age
+    return age_out, age_target
+def validate(args):
+    if torch.cuda.is_available():
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.benchmark = True
+    mivolo_model = MiVOLO(
+        args.checkpoint,
+        args.device,
+        half=args.half,
+        use_persons=args.with_persons,
+        disable_faces=args.disable_faces,
+        verbose=True,
+    )
+    dataset, loader = build_data(
+        name=args.dataset_name,
+        images_path=args.dataset_images,
+        annotations_path=args.dataset_annotations,
+        split=args.split,
+        mivolo_model=mivolo_model,  # to get meta information from model
+        workers=args.workers,
+        batch_size=args.batch_size,
+    )
+    d_stat = Metrics(args.l_for_cs, args.draw_hist, dataset.age_classes)
+    # warmup, reduce variability of first batch time, especially for comparing torchscript vs non
+    mivolo_model.warmup(args.batch_size)
+    preproc_end = time_sync()
+    for batch_idx, (input, target) in enumerate(loader):
+        preprocess_time = time_sync() - preproc_end
+        # get output and calculate loss
+        age_out, age_target, gender_out, gender_target, process_time = process_batch(
+            mivolo_model, input, target, dataset.num_classes_gender
+        )
+        gender_out, gender_target = postprocess_gender(gender_out, gender_target)
+        age_out, age_target = postprocess_age(age_out, age_target, dataset)
+        d_stat.update_gender_accuracy(gender_out, gender_target)
+        if d_stat.is_regression:
+            d_stat.update_regression_age_metrics(age_out, age_target)
+        else:
+            d_stat.update_age_accuracy(age_out, age_target)
+        d_stat.update_time(process_time, preprocess_time, input.shape[0])
+        if batch_idx % LOG_FREQUENCY == 0:
+            _logger.info(
+                "Test: [{0:>4d}/{1}]  " "{2}".format(batch_idx, len(loader), d_stat.get_info_str(input.size(0)))
+            )
+        preproc_end = time_sync()
+    # model info
+    results = dict(
+        model=args.checkpoint,
+        dataset_name=args.dataset_name,
+        param_count=round(mivolo_model.param_count / 1e6, 2),
+        img_size=mivolo_model.input_size,
+        use_faces=mivolo_model.meta.use_face_crops,
+        use_persons=mivolo_model.meta.use_persons,
+        in_chans=mivolo_model.meta.in_chans,
+        batch=args.batch_size,
+    )
+    # metrics info
+    results.update(d_stat.get_result())
+    return results
+def main():
+    parser = get_parser()
+    setup_default_logging()
+    args = parser.parse_args()
+    if torch.cuda.is_available():
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.benchmark = True
+    results = validate(args)
+    result_str = " * Age Acc@1 {:.3f} ({:.3f})".format(results["agetop1"], results["agetop1_err"])
+    if "gendertop1" in results:
+        result_str += " Gender Acc@1 1 {:.3f} ({:.3f})".format(results["gendertop1"], results["gendertop1_err"])
+    result_str += " Mean inference time {:.3f} ms Mean preprocessing time {:.3f}".format(
+        results["mean_inference_time"], results["mean_preprocessing_time"]
+    )
+    _logger.info(result_str)
+    if args.draw_hist and "per_age_error" in results:
+        err = [sum(v) / len(v) for k, v in results["per_age_error"].items()]
+        ages = list(results["per_age_error"].keys())
+        sns.scatterplot(x=ages, y=err, hue=err)
+        plt.legend([], [], frameon=False)
+        plt.xlabel("Age")
+        plt.ylabel("MAE")
+        plt.savefig("age_error.png", dpi=300)
+    if args.results_file:
+        write_results(args.results_file, results, format=args.results_format)
+    # output results in JSON to stdout w/ delimiter for runner script
+    print(f"--result\n{json.dumps(results, indent=4)}")
+if __name__ == "__main__":
+    main()

age_estimator/mivolo/eval_tools.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import csv
+import json
+import time
+from collections import OrderedDict, defaultdict
+import torch
+from mivolo.data.misc import cumulative_error, cumulative_score
+from timm.utils import AverageMeter, accuracy
+def time_sync():
+    # pytorch-accurate time
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+    return time.time()
+def write_results(results_file, results, format="csv"):
+    with open(results_file, mode="w") as cf:
+        if format == "json":
+            json.dump(results, cf, indent=4)
+        else:
+            if not isinstance(results, (list, tuple)):
+                results = [results]
+            if not results:
+                return
+            dw = csv.DictWriter(cf, fieldnames=results[0].keys())
+            dw.writeheader()
+            for r in results:
+                dw.writerow(r)
+            cf.flush()
+class Metrics:
+    def __init__(self, l_for_cs, draw_hist, age_classes=None):
+        self.batch_time = AverageMeter()
+        self.preproc_batch_time = AverageMeter()
+        self.seen = 0
+        self.losses = AverageMeter()
+        self.top1_m_gender = AverageMeter()
+        self.top1_m_age = AverageMeter()
+        if age_classes is None:
+            self.is_regression = True
+            self.av_csl_age = AverageMeter()
+            self.max_error = AverageMeter()
+            self.per_age_error = defaultdict(list)
+            self.l_for_cs = l_for_cs
+        else:
+            self.is_regression = False
+        self.draw_hist = draw_hist
+    def update_regression_age_metrics(self, age_out, age_target):
+        batch_size = age_out.size(0)
+        age_abs_err = torch.abs(age_out - age_target)
+        age_acc1 = torch.sum(age_abs_err) / age_out.shape[0]
+        age_csl = cumulative_score(age_out, age_target, self.l_for_cs)
+        me = cumulative_error(age_out, age_target, 20)
+        self.top1_m_age.update(age_acc1.item(), batch_size)
+        self.av_csl_age.update(age_csl.item(), batch_size)
+        self.max_error.update(me.item(), batch_size)
+        if self.draw_hist:
+            for i in range(age_out.shape[0]):
+                self.per_age_error[int(age_target[i].item())].append(age_abs_err[i].item())
+    def update_age_accuracy(self, age_out, age_target):
+        batch_size = age_out.size(0)
+        if batch_size == 0:
+            return
+        correct = torch.sum(age_out == age_target)
+        age_acc1 = correct * 100.0 / batch_size
+        self.top1_m_age.update(age_acc1.item(), batch_size)
+    def update_gender_accuracy(self, gender_out, gender_target):
+        if gender_out is None or gender_out.size(0) == 0:
+            return
+        batch_size = gender_out.size(0)
+        gender_acc1 = accuracy(gender_out, gender_target, topk=(1,))[0]
+        if gender_acc1 is not None:
+            self.top1_m_gender.update(gender_acc1.item(), batch_size)
+    def update_loss(self, loss, batch_size):
+        self.losses.update(loss.item(), batch_size)
+    def update_time(self, process_time, preprocess_time, batch_size):
+        self.seen += batch_size
+        self.batch_time.update(process_time)
+        self.preproc_batch_time.update(preprocess_time)
+    def get_info_str(self, batch_size):
+        avg_time = (self.preproc_batch_time.sum + self.batch_time.sum) / self.batch_time.count
+        cur_time = self.batch_time.val + self.preproc_batch_time.val
+        middle_info = (
+            "Time: {cur_time:.3f}s ({avg_time:.3f}s, {rate_avg:>7.2f}/s)  "
+            "Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  "
+            "Gender Acc: {top1gender.val:>7.2f} ({top1gender.avg:>7.2f}) ".format(
+                cur_time=cur_time,
+                avg_time=avg_time,
+                rate_avg=batch_size / avg_time,
+                loss=self.losses,
+                top1gender=self.top1_m_gender,
+            )
+        )
+        if self.is_regression:
+            age_info = (
+                "Age CS@{l_for_cs}: {csl.val:>7.4f} ({csl.avg:>7.4f})  "
+                "Age CE@20: {max_error.val:>7.4f} ({max_error.avg:>7.4f})  "
+                "Age ME: {top1age.val:>7.2f} ({top1age.avg:>7.2f})".format(
+                    top1age=self.top1_m_age, csl=self.av_csl_age, max_error=self.max_error, l_for_cs=self.l_for_cs
+                )
+            )
+        else:
+            age_info = "Age Acc: {top1age.val:>7.2f} ({top1age.avg:>7.2f})".format(top1age=self.top1_m_age)
+        return middle_info + age_info
+    def get_result(self):
+        age_top1a = self.top1_m_age.avg
+        gender_top1 = self.top1_m_gender.avg if self.top1_m_gender.count > 0 else None
+        mean_per_image_time = self.batch_time.sum / self.seen
+        mean_preprocessing_time = self.preproc_batch_time.sum / self.seen
+        results = OrderedDict(
+            mean_inference_time=mean_per_image_time * 1e3,
+            mean_preprocessing_time=mean_preprocessing_time * 1e3,
+            agetop1=round(age_top1a, 4),
+            agetop1_err=round(100 - age_top1a, 4),
+        )
+        if self.is_regression:
+            results.update(
+                dict(
+                    max_error=self.max_error.avg,
+                    csl=self.av_csl_age.avg,
+                    per_age_error=self.per_age_error,
+                )
+            )
+        if gender_top1 is not None:
+            results.update(dict(gendertop1=round(gender_top1, 4), gendertop1_err=round(100 - gender_top1, 4)))
+        return results

age_estimator/mivolo/images/MiVOLO.jpg ADDED Viewed

age_estimator/mivolo/infer.py ADDED Viewed

	@@ -0,0 +1,88 @@

+#!/usr/bin/env python
+import pathlib
+import os
+import huggingface_hub
+import numpy as np
+import argparse
+from dataclasses import dataclass
+from mivolo.predictor import Predictor
+from PIL import Image
+@dataclass
+class Cfg:
+    detector_weights: str
+    checkpoint: str
+    device: str = "cpu"
+    with_persons: bool = True
+    disable_faces: bool = False
+    draw: bool = True
+def load_models():
+    detector_path = huggingface_hub.hf_hub_download('iitolstykh/demo_yolov8_detector',
+                                                    'yolov8x_person_face.pt')
+    age_gender_path_v1 = 'age_estimator/MiVOLO-main/models/model_imdb_cross_person_4.22_99.46.pth.tar'
+    predictor_cfg_v1 = Cfg(detector_path, age_gender_path_v1)
+    predictor_v1 = Predictor(predictor_cfg_v1)
+    return predictor_v1
+def detect(image: np.ndarray, score_threshold: float, iou_threshold: float, mode: str, predictor: Predictor) -> np.ndarray:
+    predictor.detector.detector_kwargs['conf'] = score_threshold
+    predictor.detector.detector_kwargs['iou'] = iou_threshold
+    if mode == "Use persons and faces":
+        use_persons = True
+        disable_faces = False
+    elif mode == "Use persons only":
+        use_persons = True
+        disable_faces = True
+    elif mode == "Use faces only":
+        use_persons = False
+        disable_faces = False
+    predictor.age_gender_model.meta.use_persons = use_persons
+    predictor.age_gender_model.meta.disable_faces = disable_faces
+    image = image[:, :, ::-1]  # RGB -> BGR for OpenCV
+    detected_objects, out_im = predictor.recognize(image)
+    return out_im[:, :, ::-1]  # BGR -> RGB
+def load_image(image_path: str):
+    image = Image.open(image_path)
+    image_np = np.array(image)
+    return image_np
+def main(args):
+    # Load models
+    predictor_v1 = load_models()
+    # Set parameters from args
+    score_threshold = args.score_threshold
+    iou_threshold = args.iou_threshold
+    mode = args.mode
+    # Load and process image
+    image_np = load_image(args.image_path)
+    # Predict with model
+    result = detect(image_np, score_threshold, iou_threshold, mode, predictor_v1)
+    output_image = Image.fromarray(result)
+    output_image.save(args.output_path)
+    output_image.show()
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Object Detection with YOLOv8 and Age/Gender Prediction')
+    parser.add_argument('--image_path', type=str, required=True, help='Path to the input image')
+    parser.add_argument('--output_path', type=str, default='output_image.jpg', help='Path to save the output image')
+    parser.add_argument('--score_threshold', type=float, default=0.4, help='Score threshold for detection')
+    parser.add_argument('--iou_threshold', type=float, default=0.7, help='IoU threshold for detection')
+    parser.add_argument('--mode', type=str, choices=["Use persons and faces", "Use persons only", "Use faces only"],
+                        default="Use persons and faces", help='Detection mode')
+    args = parser.parse_args()
+    main(args)

age_estimator/mivolo/license/en_us.pdf ADDED Viewed

Binary file (158 kB). View file

age_estimator/mivolo/license/ru.pdf ADDED Viewed

Binary file (199 kB). View file

age_estimator/mivolo/measure_time.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import pandas as pd
+import torch
+import tqdm
+from eval_tools import time_sync
+from mivolo.model.create_timm_model import create_model
+if __name__ == "__main__":
+    face_person_ckpt_path = "/data/dataset/iikrasnova/age_gender/pretrained/checkpoint-377.pth.tar"
+    face_person_input_size = [6, 224, 224]
+    face_age_ckpt_path = "/data/dataset/iikrasnova/age_gender/pretrained/model_only_age_imdb_4.32.pth.tar"
+    face_input_size = [3, 224, 224]
+    model_names = ["face_body_model", "face_model"]
+    # batch_size = 16
+    steps = 1000
+    warmup_steps = 10
+    device = torch.device("cuda:1")
+    df_data = []
+    batch_sizes = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
+    for ckpt_path, input_size, model_name, num_classes in zip(
+        [face_person_ckpt_path, face_age_ckpt_path], [face_person_input_size, face_input_size], model_names, [3, 1]
+    ):
+        in_chans = input_size[0]
+        print(f"Collecting stat for {ckpt_path} ...")
+        model = create_model(
+            "mivolo_d1_224",
+            num_classes=num_classes,
+            in_chans=in_chans,
+            pretrained=False,
+            checkpoint_path=ckpt_path,
+            filter_keys=["fds."],
+        )
+        model = model.to(device)
+        model.eval()
+        model = model.half()
+        time_per_batch = {}
+        for batch_size in batch_sizes:
+            create_t0 = time_sync()
+            for _ in range(steps):
+                inputs = torch.randn((batch_size,) + tuple(input_size)).to(device).half()
+            create_t1 = time_sync()
+            create_taken = create_t1 - create_t0
+            with torch.no_grad():
+                inputs = torch.randn((batch_size,) + tuple(input_size)).to(device).half()
+                for _ in range(warmup_steps):
+                    out = model(inputs)
+                all_time = 0
+                for _ in tqdm.tqdm(range(steps), desc=f"{model_name} batch {batch_size}"):
+                    start = time_sync()
+                    inputs = torch.randn((batch_size,) + tuple(input_size)).to(device).half()
+                    out = model(inputs)
+                    out += 1
+                    end = time_sync()
+                    all_time += end - start
+                time_taken = (all_time - create_taken) * 1000 / steps / batch_size
+                print(f"Inference {inputs.shape}, steps: {steps}. Mean time taken {time_taken} ms / image")
+            time_per_batch[str(batch_size)] = f"{time_taken:.2f}"
+        df_data.append(time_per_batch)
+    headers = list(map(str, batch_sizes))
+    output_df = pd.DataFrame(df_data, columns=headers)
+    output_df.index = model_names
+    df2_transposed = output_df.T
+    out_file = "batch_sizes.csv"
+    df2_transposed.to_csv(out_file, sep=",")
+    print(f"Saved time stat for {len(df2_transposed)} batches to {out_file}")

age_estimator/mivolo/mivolo/__init__.py ADDED Viewed

File without changes

age_estimator/mivolo/mivolo/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (156 Bytes). View file

age_estimator/mivolo/mivolo/__pycache__/predictor.cpython-38.pyc ADDED Viewed

Binary file (2.52 kB). View file

age_estimator/mivolo/mivolo/__pycache__/structures.cpython-38.pyc ADDED Viewed

Binary file (17 kB). View file

age_estimator/mivolo/mivolo/data/__init__.py ADDED Viewed

File without changes

age_estimator/mivolo/mivolo/data/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (161 Bytes). View file

age_estimator/mivolo/mivolo/data/__pycache__/data_reader.cpython-38.pyc ADDED Viewed

Binary file (5.23 kB). View file

age_estimator/mivolo/mivolo/data/__pycache__/misc.cpython-38.pyc ADDED Viewed

Binary file (7.47 kB). View file

age_estimator/mivolo/mivolo/data/data_reader.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import os
+from collections import defaultdict
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Dict, List, Optional, Tuple
+import pandas as pd
+IMAGES_EXT: Tuple = (".jpeg", ".jpg", ".png", ".webp", ".bmp", ".gif")
+VIDEO_EXT: Tuple = (".mp4", ".avi", ".mov", ".mkv", ".webm")
+@dataclass
+class PictureInfo:
+    image_path: str
+    age: Optional[str]  # age or age range(start;end format) or "-1"
+    gender: Optional[str]  # "M" of "F" or "-1"
+    bbox: List[int] = field(default_factory=lambda: [-1, -1, -1, -1])  # face bbox: xyxy
+    person_bbox: List[int] = field(default_factory=lambda: [-1, -1, -1, -1])  # person bbox: xyxy
+    @property
+    def has_person_bbox(self) -> bool:
+        return any(coord != -1 for coord in self.person_bbox)
+    @property
+    def has_face_bbox(self) -> bool:
+        return any(coord != -1 for coord in self.bbox)
+    def has_gt(self, only_age: bool = False) -> bool:
+        if only_age:
+            return self.age != "-1"
+        else:
+            return not (self.age == "-1" and self.gender == "-1")
+    def clear_person_bbox(self):
+        self.person_bbox = [-1, -1, -1, -1]
+    def clear_face_bbox(self):
+        self.bbox = [-1, -1, -1, -1]
+class AnnotType(Enum):
+    ORIGINAL = "original"
+    PERSONS = "persons"
+    NONE = "none"
+    @classmethod
+    def _missing_(cls, value):
+        print(f"WARN: Unknown annotation type {value}.")
+        return AnnotType.NONE
+def get_all_files(path: str, extensions: Tuple = IMAGES_EXT):
+    files_all = []
+    for root, subFolders, files in os.walk(path):
+        for name in files:
+            # linux tricks with .directory that still is file
+            if "directory" not in name and sum([ext.lower() in name.lower() for ext in extensions]) > 0:
+                files_all.append(os.path.join(root, name))
+    return files_all
+class InputType(Enum):
+    Image = 0
+    Video = 1
+    VideoStream = 2
+def get_input_type(input_path: str) -> InputType:
+    if os.path.isdir(input_path):
+        print("Input is a folder, only images will be processed")
+        return InputType.Image
+    elif os.path.isfile(input_path):
+        if input_path.endswith(VIDEO_EXT):
+            return InputType.Video
+        if input_path.endswith(IMAGES_EXT):
+            return InputType.Image
+        else:
+            raise ValueError(
+                f"Unknown or unsupported input file format {input_path}, \
+                             supported video formats: {VIDEO_EXT}, \
+                             supported image formats: {IMAGES_EXT}"
+            )
+    elif input_path.startswith("http") and not input_path.endswith(IMAGES_EXT):
+        return InputType.VideoStream
+    else:
+        raise ValueError(f"Unknown input {input_path}")
+def read_csv_annotation_file(annotation_file: str, images_dir: str, ignore_without_gt=False):
+    bboxes_per_image: Dict[str, List[PictureInfo]] = defaultdict(list)
+    df = pd.read_csv(annotation_file, sep=",")
+    annot_type = AnnotType("persons") if "person_x0" in df.columns else AnnotType("original")
+    print(f"Reading {annotation_file} (type: {annot_type})...")
+    missing_images = 0
+    for index, row in df.iterrows():
+        img_path = os.path.join(images_dir, row["img_name"])
+        if not os.path.exists(img_path):
+            missing_images += 1
+            continue
+        face_x1, face_y1, face_x2, face_y2 = row["face_x0"], row["face_y0"], row["face_x1"], row["face_y1"]
+        age, gender = str(row["age"]), str(row["gender"])
+        if ignore_without_gt and (age == "-1" or gender == "-1"):
+            continue
+        if annot_type == AnnotType.PERSONS:
+            p_x1, p_y1, p_x2, p_y2 = row["person_x0"], row["person_y0"], row["person_x1"], row["person_y1"]
+            person_bbox = list(map(int, [p_x1, p_y1, p_x2, p_y2]))
+        else:
+            person_bbox = [-1, -1, -1, -1]
+        bbox = list(map(int, [face_x1, face_y1, face_x2, face_y2]))
+        pic_info = PictureInfo(img_path, age, gender, bbox, person_bbox)
+        assert isinstance(pic_info.person_bbox, list)
+        bboxes_per_image[img_path].append(pic_info)
+    if missing_images > 0:
+        print(f"WARNING: Missing images: {missing_images}/{len(df)}")
+    return bboxes_per_image, annot_type

age_estimator/mivolo/mivolo/data/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from typing import Tuple
+import torch
+from mivolo.model.mi_volo import MiVOLO
+from .age_gender_dataset import AgeGenderDataset
+from .age_gender_loader import create_loader
+from .classification_dataset import AdienceDataset, FairFaceDataset
+DATASET_CLASS_MAP = {
+    "utk": AgeGenderDataset,
+    "lagenda": AgeGenderDataset,
+    "imdb": AgeGenderDataset,
+    "agedb": AgeGenderDataset,
+    "cacd": AgeGenderDataset,
+    "adience": AdienceDataset,
+    "fairface": FairFaceDataset,
+}
+def build(
+    name: str,
+    images_path: str,
+    annotations_path: str,
+    split: str,
+    mivolo_model: MiVOLO,
+    workers: int,
+    batch_size: int,
+) -> Tuple[torch.utils.data.Dataset, torch.utils.data.DataLoader]:
+    dataset_class = DATASET_CLASS_MAP[name]
+    dataset: torch.utils.data.Dataset = dataset_class(
+        images_path=images_path,
+        annotations_path=annotations_path,
+        name=name,
+        split=split,
+        target_size=mivolo_model.input_size,
+        max_age=mivolo_model.meta.max_age,
+        min_age=mivolo_model.meta.min_age,
+        model_with_persons=mivolo_model.meta.with_persons_model,
+        use_persons=mivolo_model.meta.use_persons,
+        disable_faces=mivolo_model.meta.disable_faces,
+        only_age=mivolo_model.meta.only_age,
+    )
+    data_config = mivolo_model.data_config
+    in_chans = 3 if not mivolo_model.meta.with_persons_model else 6
+    input_size = (in_chans, mivolo_model.input_size, mivolo_model.input_size)
+    dataset_loader: torch.utils.data.DataLoader = create_loader(
+        dataset,
+        input_size=input_size,
+        batch_size=batch_size,
+        mean=data_config["mean"],
+        std=data_config["std"],
+        num_workers=workers,
+        crop_pct=data_config["crop_pct"],
+        crop_mode=data_config["crop_mode"],
+        pin_memory=False,
+        device=mivolo_model.device,
+        target_type=dataset.target_dtype,
+    )
+    return dataset, dataset_loader

age_estimator/mivolo/mivolo/data/dataset/age_gender_dataset.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import logging
+from typing import Any, List, Optional, Set
+import cv2
+import numpy as np
+import torch
+from mivolo.data.dataset.reader_age_gender import ReaderAgeGender
+from PIL import Image
+from torchvision import transforms
+_logger = logging.getLogger("AgeGenderDataset")
+class AgeGenderDataset(torch.utils.data.Dataset):
+    def __init__(
+        self,
+        images_path,
+        annotations_path,
+        name=None,
+        split="train",
+        load_bytes=False,
+        img_mode="RGB",
+        transform=None,
+        is_training=False,
+        seed=1234,
+        target_size=224,
+        min_age=None,
+        max_age=None,
+        model_with_persons=False,
+        use_persons=False,
+        disable_faces=False,
+        only_age=False,
+    ):
+        reader = ReaderAgeGender(
+            images_path,
+            annotations_path,
+            split=split,
+            seed=seed,
+            target_size=target_size,
+            with_persons=use_persons,
+            disable_faces=disable_faces,
+            only_age=only_age,
+        )
+        self.name = name
+        self.model_with_persons = model_with_persons
+        self.reader = reader
+        self.load_bytes = load_bytes
+        self.img_mode = img_mode
+        self.transform = transform
+        self._consecutive_errors = 0
+        self.is_training = is_training
+        self.random_flip = 0.0
+        # Setting up classes.
+        # If min and max classes are passed - use them to have the same preprocessing for validation
+        self.max_age: float = None
+        self.min_age: float = None
+        self.avg_age: float = None
+        self.set_ages_min_max(min_age, max_age)
+        self.genders = ["M", "F"]
+        self.num_classes_gender = len(self.genders)
+        self.age_classes: Optional[List[str]] = self.set_age_classes()
+        self.num_classes_age = 1 if self.age_classes is None else len(self.age_classes)
+        self.num_classes: int = self.num_classes_age + self.num_classes_gender
+        self.target_dtype = torch.float32
+    def set_age_classes(self) -> Optional[List[str]]:
+        return None  # for regression dataset
+    def set_ages_min_max(self, min_age: Optional[float], max_age: Optional[float]):
+        assert all(age is None for age in [min_age, max_age]) or all(
+            age is not None for age in [min_age, max_age]
+        ), "Both min and max age must be passed or none of them"
+        if max_age is not None and min_age is not None:
+            _logger.info(f"Received predefined min_age {min_age} and max_age {max_age}")
+            self.max_age = max_age
+            self.min_age = min_age
+        else:
+            # collect statistics from loaded dataset
+            all_ages_set: Set[int] = set()
+            for img_path, image_samples in self.reader._ann.items():
+                for image_sample_info in image_samples:
+                    if image_sample_info.age == "-1":
+                        continue
+                    age = round(float(image_sample_info.age))
+                    all_ages_set.add(age)
+            self.max_age = max(all_ages_set)
+            self.min_age = min(all_ages_set)
+        self.avg_age = (self.max_age + self.min_age) / 2.0
+    def _norm_age(self, age):
+        return (age - self.avg_age) / (self.max_age - self.min_age)
+    def parse_gender(self, _gender: str) -> float:
+        if _gender != "-1":
+            gender = float(0 if _gender == "M" or _gender == "0" else 1)
+        else:
+            gender = -1
+        return gender
+    def parse_target(self, _age: str, gender: str) -> List[Any]:
+        if _age != "-1":
+            age = round(float(_age))
+            age = self._norm_age(float(age))
+        else:
+            age = -1
+        target: List[float] = [age, self.parse_gender(gender)]
+        return target
+    @property
+    def transform(self):
+        return self._transform
+    @transform.setter
+    def transform(self, transform):
+        # Disable pretrained monkey-patched transforms
+        if not transform:
+            return
+        _trans = []
+        for trans in transform.transforms:
+            if "Resize" in str(trans):
+                continue
+            if "Crop" in str(trans):
+                continue
+            _trans.append(trans)
+        self._transform = transforms.Compose(_trans)
+    def apply_tranforms(self, image: Optional[np.ndarray]) -> np.ndarray:
+        if image is None:
+            return None
+        if self.transform is None:
+            return image
+        image = convert_to_pil(image, self.img_mode)
+        for trans in self.transform.transforms:
+            image = trans(image)
+        return image
+    def __getitem__(self, index):
+        # get preprocessed face and person crops (np.ndarray)
+        # resize + pad, for person crops: cut off other bboxes
+        images, target = self.reader[index]
+        target = self.parse_target(*target)
+        if self.model_with_persons:
+            face_image, person_image = images
+            person_image: np.ndarray = self.apply_tranforms(person_image)
+        else:
+            face_image = images[0]
+            person_image = None
+        face_image: np.ndarray = self.apply_tranforms(face_image)
+        if person_image is not None:
+            img = np.concatenate([face_image, person_image], axis=0)
+        else:
+            img = face_image
+        return img, target
+    def __len__(self):
+        return len(self.reader)
+    def filename(self, index, basename=False, absolute=False):
+        return self.reader.filename(index, basename, absolute)
+    def filenames(self, basename=False, absolute=False):
+        return self.reader.filenames(basename, absolute)
+def convert_to_pil(cv_im: Optional[np.ndarray], img_mode: str = "RGB") -> "Image":
+    if cv_im is None:
+        return None
+    if img_mode == "RGB":
+        cv_im = cv2.cvtColor(cv_im, cv2.COLOR_BGR2RGB)
+    else:
+        raise Exception("Incorrect image mode has been passed!")
+    cv_im = np.ascontiguousarray(cv_im)
+    pil_image = Image.fromarray(cv_im)
+    return pil_image

age_estimator/mivolo/mivolo/data/dataset/age_gender_loader.py ADDED Viewed

	@@ -0,0 +1,169 @@

+"""
+Code adapted from timm https://github.com/huggingface/pytorch-image-models
+Modifications and additions for mivolo by / Copyright 2023, Irina Tolstykh, Maxim Kuprashevich
+"""
+import logging
+from contextlib import suppress
+from functools import partial
+from itertools import repeat
+import numpy as np
+import torch
+import torch.utils.data
+from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+from timm.data.dataset import IterableImageDataset
+from timm.data.loader import PrefetchLoader, _worker_init
+from timm.data.transforms_factory import create_transform
+_logger = logging.getLogger(__name__)
+def fast_collate(batch, target_dtype=torch.uint8):
+    """A fast collation function optimized for uint8 images (np array or torch) and target_dtype targets (labels)"""
+    assert isinstance(batch[0], tuple)
+    batch_size = len(batch)
+    if isinstance(batch[0][0], np.ndarray):
+        targets = torch.tensor([b[1] for b in batch], dtype=target_dtype)
+        assert len(targets) == batch_size
+        tensor = torch.zeros((batch_size, *batch[0][0].shape), dtype=torch.uint8)
+        for i in range(batch_size):
+            tensor[i] += torch.from_numpy(batch[i][0])
+        return tensor, targets
+    else:
+        raise ValueError(f"Incorrect batch type: {type(batch[0][0])}")
+def adapt_to_chs(x, n):
+    if not isinstance(x, (tuple, list)):
+        x = tuple(repeat(x, n))
+    elif len(x) != n:
+        # doubled channels
+        if len(x) * 2 == n:
+            x = np.concatenate((x, x))
+            _logger.warning(f"Pretrained mean/std different shape than model (doubled channes), using concat: {x}.")
+        else:
+            x_mean = np.mean(x).item()
+            x = (x_mean,) * n
+            _logger.warning(f"Pretrained mean/std different shape than model, using avg value {x}.")
+    else:
+        assert len(x) == n, "normalization stats must match image channels"
+    return x
+class PrefetchLoaderForMultiInput(PrefetchLoader):
+    def __init__(
+        self,
+        loader,
+        mean=IMAGENET_DEFAULT_MEAN,
+        std=IMAGENET_DEFAULT_STD,
+        channels=3,
+        device=torch.device("cuda"),
+        img_dtype=torch.float32,
+    ):
+        mean = adapt_to_chs(mean, channels)
+        std = adapt_to_chs(std, channels)
+        normalization_shape = (1, channels, 1, 1)
+        self.loader = loader
+        self.device = device
+        self.img_dtype = img_dtype
+        self.mean = torch.tensor([x * 255 for x in mean], device=device, dtype=img_dtype).view(normalization_shape)
+        self.std = torch.tensor([x * 255 for x in std], device=device, dtype=img_dtype).view(normalization_shape)
+        self.is_cuda = torch.cuda.is_available() and device.type == "cuda"
+    def __iter__(self):
+        first = True
+        if self.is_cuda:
+            stream = torch.cuda.Stream()
+            stream_context = partial(torch.cuda.stream, stream=stream)
+        else:
+            stream = None
+            stream_context = suppress
+        for next_input, next_target in self.loader:
+            with stream_context():
+                next_input = next_input.to(device=self.device, non_blocking=True)
+                next_target = next_target.to(device=self.device, non_blocking=True)
+                next_input = next_input.to(self.img_dtype).sub_(self.mean).div_(self.std)
+            if not first:
+                yield input, target  # noqa: F823, F821
+            else:
+                first = False
+            if stream is not None:
+                torch.cuda.current_stream().wait_stream(stream)
+            input = next_input
+            target = next_target
+        yield input, target
+def create_loader(
+    dataset,
+    input_size,
+    batch_size,
+    mean=IMAGENET_DEFAULT_MEAN,
+    std=IMAGENET_DEFAULT_STD,
+    num_workers=1,
+    crop_pct=None,
+    crop_mode=None,
+    pin_memory=False,
+    img_dtype=torch.float32,
+    device=torch.device("cuda"),
+    persistent_workers=True,
+    worker_seeding="all",
+    target_type=torch.int64,
+):
+    transform = create_transform(
+        input_size,
+        is_training=False,
+        use_prefetcher=True,
+        mean=mean,
+        std=std,
+        crop_pct=crop_pct,
+        crop_mode=crop_mode,
+    )
+    dataset.transform = transform
+    if isinstance(dataset, IterableImageDataset):
+        # give Iterable datasets early knowledge of num_workers so that sample estimates
+        # are correct before worker processes are launched
+        dataset.set_loader_cfg(num_workers=num_workers)
+        raise ValueError("Incorrect dataset type: IterableImageDataset")
+    loader_class = torch.utils.data.DataLoader
+    loader_args = dict(
+        batch_size=batch_size,
+        shuffle=False,
+        num_workers=num_workers,
+        sampler=None,
+        collate_fn=lambda batch: fast_collate(batch, target_dtype=target_type),
+        pin_memory=pin_memory,
+        drop_last=False,
+        worker_init_fn=partial(_worker_init, worker_seeding=worker_seeding),
+        persistent_workers=persistent_workers,
+    )
+    try:
+        loader = loader_class(dataset, **loader_args)
+    except TypeError:
+        loader_args.pop("persistent_workers")  # only in Pytorch 1.7+
+        loader = loader_class(dataset, **loader_args)
+    loader = PrefetchLoaderForMultiInput(
+        loader,
+        mean=mean,
+        std=std,
+        channels=input_size[0],
+        device=device,
+        img_dtype=img_dtype,
+    )
+    return loader

age_estimator/mivolo/mivolo/data/dataset/classification_dataset.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from typing import Any, List, Optional
+import torch
+from .age_gender_dataset import AgeGenderDataset
+class ClassificationDataset(AgeGenderDataset):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.target_dtype = torch.int32
+    def set_age_classes(self) -> Optional[List[str]]:
+        raise NotImplementedError
+    def parse_target(self, age: str, gender: str) -> List[Any]:
+        assert self.age_classes is not None
+        if age != "-1":
+            assert age in self.age_classes, f"Unknown category in {self.name} dataset: {age}"
+            age_ind = self.age_classes.index(age)
+        else:
+            age_ind = -1
+        target: List[int] = [age_ind, int(self.parse_gender(gender))]
+        return target
+class FairFaceDataset(ClassificationDataset):
+    def set_age_classes(self) -> Optional[List[str]]:
+        age_classes = ["0;2", "3;9", "10;19", "20;29", "30;39", "40;49", "50;59", "60;69", "70;120"]
+        # a[i-1] <= v < a[i] => age_classes[i-1]
+        self._intervals = torch.tensor([0, 3, 10, 20, 30, 40, 50, 60, 70])
+        return age_classes
+class AdienceDataset(ClassificationDataset):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.target_dtype = torch.int32
+    def set_age_classes(self) -> Optional[List[str]]:
+        age_classes = ["0;2", "4;6", "8;12", "15;20", "25;32", "38;43", "48;53", "60;100"]
+        # a[i-1] <= v < a[i] => age_classes[i-1]
+        self._intervals = torch.tensor([0, 4, 7, 14, 24, 36, 46, 57])
+        return age_classes

age_estimator/mivolo/mivolo/data/dataset/reader_age_gender.py ADDED Viewed

	@@ -0,0 +1,492 @@

+import logging
+import os
+from functools import partial
+from multiprocessing.pool import ThreadPool
+from typing import Dict, List, Optional, Tuple
+import cv2
+import numpy as np
+from mivolo.data.data_reader import AnnotType, PictureInfo, get_all_files, read_csv_annotation_file
+from mivolo.data.misc import IOU, class_letterbox
+from timm.data.readers.reader import Reader
+from tqdm import tqdm
+CROP_ROUND_TOL = 0.3
+MIN_PERSON_SIZE = 100
+MIN_PERSON_CROP_AFTERCUT_RATIO = 0.4
+_logger = logging.getLogger("ReaderAgeGender")
+class ReaderAgeGender(Reader):
+    """
+    Reader for almost original imdb-wiki cleaned dataset.
+    Two changes:
+        1. Your annotation must be in ./annotation subdir of dataset root
+        2. Images must be in images subdir
+    """
+    def __init__(
+        self,
+        images_path,
+        annotations_path,
+        split="validation",
+        target_size=224,
+        min_size=5,
+        seed=1234,
+        with_persons=False,
+        min_person_size=MIN_PERSON_SIZE,
+        disable_faces=False,
+        only_age=False,
+        min_person_aftercut_ratio=MIN_PERSON_CROP_AFTERCUT_RATIO,
+        crop_round_tol=CROP_ROUND_TOL,
+    ):
+        super().__init__()
+        self.with_persons = with_persons
+        self.disable_faces = disable_faces
+        self.only_age = only_age
+        # can be only black for now, even though it's not very good with further normalization
+        self.crop_out_color = (0, 0, 0)
+        self.empty_crop = np.ones((target_size, target_size, 3)) * self.crop_out_color
+        self.empty_crop = self.empty_crop.astype(np.uint8)
+        self.min_person_size = min_person_size
+        self.min_person_aftercut_ratio = min_person_aftercut_ratio
+        self.crop_round_tol = crop_round_tol
+        splits = split.split(",")
+        self.splits = [split.strip() for split in splits if len(split.strip())]
+        assert len(self.splits), "Incorrect split arg"
+        self.min_size = min_size
+        self.seed = seed
+        self.target_size = target_size
+        # Reading annotations. Can be multiple files if annotations_path dir
+        self._ann: Dict[str, List[PictureInfo]] = {}  # list of samples for each image
+        self._associated_objects: Dict[str, Dict[int, List[List[int]]]] = {}
+        self._faces_list: List[Tuple[str, int]] = []  # samples from this list will be loaded in __getitem__
+        self._read_annotations(images_path, annotations_path)
+        _logger.info(f"Dataset length: {len(self._faces_list)} crops")
+    def __getitem__(self, index):
+        return self._read_img_and_label(index)
+    def __len__(self):
+        return len(self._faces_list)
+    def _filename(self, index, basename=False, absolute=False):
+        img_p = self._faces_list[index][0]
+        return os.path.basename(img_p) if basename else img_p
+    def _read_annotations(self, images_path, csvs_path):
+        self._ann = {}
+        self._faces_list = []
+        self._associated_objects = {}
+        csvs = get_all_files(csvs_path, [".csv"])
+        csvs = [c for c in csvs if any(split_name in os.path.basename(c) for split_name in self.splits)]
+        # load annotations per image
+        for csv in csvs:
+            db, ann_type = read_csv_annotation_file(csv, images_path)
+            if self.with_persons and ann_type != AnnotType.PERSONS:
+                raise ValueError(
+                    f"Annotation type in file {csv} contains no persons, "
+                    f"but annotations with persons are requested."
+                )
+            self._ann.update(db)
+        if len(self._ann) == 0:
+            raise ValueError("Annotations are empty!")
+        self._ann, self._associated_objects = self.prepare_annotations()
+        images_list = list(self._ann.keys())
+        for img_path in images_list:
+            for index, image_sample_info in enumerate(self._ann[img_path]):
+                assert image_sample_info.has_gt(
+                    self.only_age
+                ), "Annotations must be checked with self.prepare_annotations() func"
+                self._faces_list.append((img_path, index))
+    def _read_img_and_label(self, index):
+        if not isinstance(index, int):
+            raise TypeError("ReaderAgeGender expected index to be integer")
+        img_p, face_index = self._faces_list[index]
+        ann: PictureInfo = self._ann[img_p][face_index]
+        img = cv2.imread(img_p)
+        face_empty = True
+        if ann.has_face_bbox and not (self.with_persons and self.disable_faces):
+            face_crop, face_empty = self._get_crop(ann.bbox, img)
+        if not self.with_persons and face_empty:
+            # model without persons
+            raise ValueError("Annotations must be checked with self.prepare_annotations() func")
+        if face_empty:
+            face_crop = self.empty_crop
+        person_empty = True
+        if self.with_persons or self.disable_faces:
+            if ann.has_person_bbox:
+                # cut off all associated objects from person crop
+                objects = self._associated_objects[img_p][face_index]
+                person_crop, person_empty = self._get_crop(
+                    ann.person_bbox,
+                    img,
+                    crop_out_color=self.crop_out_color,
+                    asced_objects=objects,
+                )
+            if face_empty and person_empty:
+                raise ValueError("Annotations must be checked with self.prepare_annotations() func")
+        if person_empty:
+            person_crop = self.empty_crop
+        return (face_crop, person_crop), [ann.age, ann.gender]
+    def _get_crop(
+        self,
+        bbox,
+        img,
+        asced_objects=None,
+        crop_out_color=(0, 0, 0),
+    ) -> Tuple[np.ndarray, bool]:
+        empty_bbox = False
+        xmin, ymin, xmax, ymax = bbox
+        assert not (
+            ymax - ymin < self.min_size or xmax - xmin < self.min_size
+        ), "Annotations must be checked with self.prepare_annotations() func"
+        crop = img[ymin:ymax, xmin:xmax]
+        if asced_objects:
+            # cut off other objects for person crop
+            crop, empty_bbox = _cropout_asced_objs(
+                asced_objects,
+                bbox,
+                crop.copy(),
+                crop_out_color=crop_out_color,
+                min_person_size=self.min_person_size,
+                crop_round_tol=self.crop_round_tol,
+                min_person_aftercut_ratio=self.min_person_aftercut_ratio,
+            )
+            if empty_bbox:
+                crop = self.empty_crop
+        crop = class_letterbox(crop, new_shape=(self.target_size, self.target_size), color=crop_out_color)
+        return crop, empty_bbox
+    def prepare_annotations(self):
+        good_anns: Dict[str, List[PictureInfo]] = {}
+        all_associated_objects: Dict[str, Dict[int, List[List[int]]]] = {}
+        if not self.with_persons:
+            # remove all persons
+            for img_path, bboxes in self._ann.items():
+                for sample in bboxes:
+                    sample.clear_person_bbox()
+        # check dataset and collect associated_objects
+        verify_images_func = partial(
+            verify_images,
+            min_size=self.min_size,
+            min_person_size=self.min_person_size,
+            with_persons=self.with_persons,
+            disable_faces=self.disable_faces,
+            crop_round_tol=self.crop_round_tol,
+            min_person_aftercut_ratio=self.min_person_aftercut_ratio,
+            only_age=self.only_age,
+        )
+        num_threads = min(8, os.cpu_count())
+        all_msgs = []
+        broken = 0
+        skipped = 0
+        all_skipped_crops = 0
+        desc = "Check annotations..."
+        with ThreadPool(num_threads) as pool:
+            pbar = tqdm(
+                pool.imap_unordered(verify_images_func, list(self._ann.items())),
+                desc=desc,
+                total=len(self._ann),
+            )
+            for (img_info, associated_objects, msgs, is_corrupted, is_empty_annotations, skipped_crops) in pbar:
+                broken += 1 if is_corrupted else 0
+                all_msgs.extend(msgs)
+                all_skipped_crops += skipped_crops
+                skipped += 1 if is_empty_annotations else 0
+                if img_info is not None:
+                    img_path, img_samples = img_info
+                    good_anns[img_path] = img_samples
+                    all_associated_objects.update({img_path: associated_objects})
+                pbar.desc = (
+                    f"{desc} {skipped} images skipped ({all_skipped_crops} crops are incorrect); "
+                    f"{broken} images corrupted"
+                )
+            pbar.close()
+        for msg in all_msgs:
+            print(msg)
+        print(f"\nLeft images: {len(good_anns)}")
+        return good_anns, all_associated_objects
+def verify_images(
+    img_info,
+    min_size: int,
+    min_person_size: int,
+    with_persons: bool,
+    disable_faces: bool,
+    crop_round_tol: float,
+    min_person_aftercut_ratio: float,
+    only_age: bool,
+):
+    # If crop is too small, if image can not be read or if image does not exist
+    # then filter out this sample
+    disable_faces = disable_faces and with_persons
+    kwargs = dict(
+        min_person_size=min_person_size,
+        disable_faces=disable_faces,
+        with_persons=with_persons,
+        crop_round_tol=crop_round_tol,
+        min_person_aftercut_ratio=min_person_aftercut_ratio,
+        only_age=only_age,
+    )
+    def bbox_correct(bbox, min_size, im_h, im_w) -> Tuple[bool, List[int]]:
+        ymin, ymax, xmin, xmax = _correct_bbox(bbox, im_h, im_w)
+        crop_h, crop_w = ymax - ymin, xmax - xmin
+        if crop_h < min_size or crop_w < min_size:
+            return False, [-1, -1, -1, -1]
+        bbox = [xmin, ymin, xmax, ymax]
+        return True, bbox
+    msgs = []
+    skipped_crops = 0
+    is_corrupted = False
+    is_empty_annotations = False
+    img_path: str = img_info[0]
+    img_samples: List[PictureInfo] = img_info[1]
+    try:
+        im_cv = cv2.imread(img_path)
+        im_h, im_w = im_cv.shape[:2]
+    except Exception:
+        msgs.append(f"Can not load image {img_path}")
+        is_corrupted = True
+        return None, {}, msgs, is_corrupted, is_empty_annotations, skipped_crops
+    out_samples: List[PictureInfo] = []
+    for sample in img_samples:
+        # correct face bbox
+        if sample.has_face_bbox:
+            is_correct, sample.bbox = bbox_correct(sample.bbox, min_size, im_h, im_w)
+            if not is_correct and sample.has_gt(only_age):
+                msgs.append("Small face. Passing..")
+                skipped_crops += 1
+        # correct person bbox
+        if sample.has_person_bbox:
+            is_correct, sample.person_bbox = bbox_correct(
+                sample.person_bbox, max(min_person_size, min_size), im_h, im_w
+            )
+            if not is_correct and sample.has_gt(only_age):
+                msgs.append(f"Small person {img_path}. Passing..")
+                skipped_crops += 1
+        if sample.has_face_bbox or sample.has_person_bbox:
+            out_samples.append(sample)
+        elif sample.has_gt(only_age):
+            msgs.append("Sample has no face and no body. Passing..")
+            skipped_crops += 1
+    # sort that samples with undefined age and gender be the last
+    out_samples = sorted(out_samples, key=lambda sample: 1 if not sample.has_gt(only_age) else 0)
+    # for each person find other faces and persons bboxes, intersected with it
+    associated_objects: Dict[int, List[List[int]]] = find_associated_objects(out_samples, only_age=only_age)
+    out_samples, associated_objects, skipped_crops = filter_bad_samples(
+        out_samples, associated_objects, im_cv, msgs, skipped_crops, **kwargs
+    )
+    out_img_info: Optional[Tuple[str, List]] = (img_path, out_samples)
+    if len(out_samples) == 0:
+        out_img_info = None
+        is_empty_annotations = True
+    return out_img_info, associated_objects, msgs, is_corrupted, is_empty_annotations, skipped_crops
+def filter_bad_samples(
+    out_samples: List[PictureInfo],
+    associated_objects: dict,
+    im_cv: np.ndarray,
+    msgs: List[str],
+    skipped_crops: int,
+    **kwargs,
+):
+    with_persons, disable_faces, min_person_size, crop_round_tol, min_person_aftercut_ratio, only_age = (
+        kwargs["with_persons"],
+        kwargs["disable_faces"],
+        kwargs["min_person_size"],
+        kwargs["crop_round_tol"],
+        kwargs["min_person_aftercut_ratio"],
+        kwargs["only_age"],
+    )
+    # left only samples with annotations
+    inds = [sample_ind for sample_ind, sample in enumerate(out_samples) if sample.has_gt(only_age)]
+    out_samples, associated_objects = _filter_by_ind(out_samples, associated_objects, inds)
+    if kwargs["disable_faces"]:
+        # clear all faces
+        for ind, sample in enumerate(out_samples):
+            sample.clear_face_bbox()
+        # left only samples with person_bbox
+        inds = [sample_ind for sample_ind, sample in enumerate(out_samples) if sample.has_person_bbox]
+        out_samples, associated_objects = _filter_by_ind(out_samples, associated_objects, inds)
+    if with_persons or disable_faces:
+        # check that preprocessing func
+        # _cropout_asced_objs() return not empty person_image for each out sample
+        inds = []
+        for ind, sample in enumerate(out_samples):
+            person_empty = True
+            if sample.has_person_bbox:
+                xmin, ymin, xmax, ymax = sample.person_bbox
+                crop = im_cv[ymin:ymax, xmin:xmax]
+                # cut off all associated objects from person crop
+                _, person_empty = _cropout_asced_objs(
+                    associated_objects[ind],
+                    sample.person_bbox,
+                    crop.copy(),
+                    min_person_size=min_person_size,
+                    crop_round_tol=crop_round_tol,
+                    min_person_aftercut_ratio=min_person_aftercut_ratio,
+                )
+            if person_empty and not sample.has_face_bbox:
+                msgs.append("Small person after preprocessing. Passing..")
+                skipped_crops += 1
+            else:
+                inds.append(ind)
+        out_samples, associated_objects = _filter_by_ind(out_samples, associated_objects, inds)
+    assert len(associated_objects) == len(out_samples)
+    return out_samples, associated_objects, skipped_crops
+def _filter_by_ind(out_samples, associated_objects, inds):
+    _associated_objects = {}
+    _out_samples = []
+    for ind, sample in enumerate(out_samples):
+        if ind in inds:
+            _associated_objects[len(_out_samples)] = associated_objects[ind]
+            _out_samples.append(sample)
+    return _out_samples, _associated_objects
+def find_associated_objects(
+    image_samples: List[PictureInfo], iou_thresh=0.0001, only_age=False
+) -> Dict[int, List[List[int]]]:
+    """
+    For each person (which has gt age and gt gender) find other faces and persons bboxes, intersected with it
+    """
+    associated_objects: Dict[int, List[List[int]]] = {}
+    for iindex, image_sample_info in enumerate(image_samples):
+        # add own face
+        associated_objects[iindex] = [image_sample_info.bbox] if image_sample_info.has_face_bbox else []
+        if not image_sample_info.has_person_bbox or not image_sample_info.has_gt(only_age):
+            # if sample has not gt => not be used
+            continue
+        iperson_box = image_sample_info.person_bbox
+        for jindex, other_image_sample in enumerate(image_samples):
+            if iindex == jindex:
+                continue
+            if other_image_sample.has_face_bbox:
+                jface_bbox = other_image_sample.bbox
+                iou = _get_iou(jface_bbox, iperson_box)
+                if iou >= iou_thresh:
+                    associated_objects[iindex].append(jface_bbox)
+            if other_image_sample.has_person_bbox:
+                jperson_bbox = other_image_sample.person_bbox
+                iou = _get_iou(jperson_bbox, iperson_box)
+                if iou >= iou_thresh:
+                    associated_objects[iindex].append(jperson_bbox)
+    return associated_objects
+def _cropout_asced_objs(
+    asced_objects,
+    person_bbox,
+    crop,
+    min_person_size,
+    crop_round_tol,
+    min_person_aftercut_ratio,
+    crop_out_color=(0, 0, 0),
+):
+    empty = False
+    xmin, ymin, xmax, ymax = person_bbox
+    for a_obj in asced_objects:
+        aobj_xmin, aobj_ymin, aobj_xmax, aobj_ymax = a_obj
+        aobj_ymin = int(max(aobj_ymin - ymin, 0))
+        aobj_xmin = int(max(aobj_xmin - xmin, 0))
+        aobj_ymax = int(min(aobj_ymax - ymin, ymax - ymin))
+        aobj_xmax = int(min(aobj_xmax - xmin, xmax - xmin))
+        crop[aobj_ymin:aobj_ymax, aobj_xmin:aobj_xmax] = crop_out_color
+    # calc useful non-black area
+    remain_ratio = np.count_nonzero(crop) / (crop.shape[0] * crop.shape[1] * crop.shape[2])
+    if (crop.shape[0] < min_person_size or crop.shape[1] < min_person_size) or remain_ratio < min_person_aftercut_ratio:
+        crop = None
+        empty = True
+    return crop, empty
+def _correct_bbox(bbox, h, w):
+    xmin, ymin, xmax, ymax = bbox
+    ymin = min(max(ymin, 0), h)
+    ymax = min(max(ymax, 0), h)
+    xmin = min(max(xmin, 0), w)
+    xmax = min(max(xmax, 0), w)
+    return ymin, ymax, xmin, xmax
+def _get_iou(bbox1, bbox2):
+    xmin1, ymin1, xmax1, ymax1 = bbox1
+    xmin2, ymin2, xmax2, ymax2 = bbox2
+    iou = IOU(
+        [ymin1, xmin1, ymax1, xmax1],
+        [ymin2, xmin2, ymax2, xmax2],
+    )
+    return iou

age_estimator/mivolo/mivolo/data/misc.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import argparse
+import ast
+import re
+from typing import List, Optional, Tuple, Union
+import cv2
+import numpy as np
+import torch
+import torchvision.transforms.functional as F
+from scipy.optimize import linear_sum_assignment
+from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+CROP_ROUND_RATE = 0.1
+MIN_PERSON_CROP_NONZERO = 0.5
+def aggregate_votes_winsorized(ages, max_age_dist=6):
+    # Replace any annotation that is more than a max_age_dist away from the median
+    # with the median + max_age_dist if higher or max_age_dist - max_age_dist if below
+    median = np.median(ages)
+    ages = np.clip(ages, median - max_age_dist, median + max_age_dist)
+    return np.mean(ages)
+def natural_key(string_):
+    """See http://www.codinghorror.com/blog/archives/001018.html"""
+    return [int(s) if s.isdigit() else s for s in re.split(r"(\d+)", string_.lower())]
+def add_bool_arg(parser, name, default=False, help=""):
+    dest_name = name.replace("-", "_")
+    group = parser.add_mutually_exclusive_group(required=False)
+    group.add_argument("--" + name, dest=dest_name, action="store_true", help=help)
+    group.add_argument("--no-" + name, dest=dest_name, action="store_false", help=help)
+    parser.set_defaults(**{dest_name: default})
+def cumulative_score(pred_ages, gt_ages, L, tol=1e-6):
+    n = pred_ages.shape[0]
+    num_correct = torch.sum(torch.abs(pred_ages - gt_ages) <= L + tol)
+    cs_score = num_correct / n
+    return cs_score
+def cumulative_error(pred_ages, gt_ages, L, tol=1e-6):
+    n = pred_ages.shape[0]
+    num_correct = torch.sum(torch.abs(pred_ages - gt_ages) >= L + tol)
+    cs_score = num_correct / n
+    return cs_score
+class ParseKwargs(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        kw = {}
+        for value in values:
+            key, value = value.split("=")
+            try:
+                kw[key] = ast.literal_eval(value)
+            except ValueError:
+                kw[key] = str(value)  # fallback to string (avoid need to escape on command line)
+        setattr(namespace, self.dest, kw)
+def box_iou(box1, box2, over_second=False):
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+    If over_second == True, return mean(intersection-over-union, (inter / area2))
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box1 (Tensor[N, 4])
+        box2 (Tensor[M, 4])
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+    def box_area(box):
+        # box = 4xn
+        return (box[2] - box[0]) * (box[3] - box[1])
+    area1 = box_area(box1.T)
+    area2 = box_area(box2.T)
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+    iou = inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
+    if over_second:
+        return (inter / area2 + iou) / 2  # mean(inter / area2, iou)
+    else:
+        return iou
+def split_batch(bs: int, dev: int) -> Tuple[int, int]:
+    full_bs = (bs // dev) * dev
+    part_bs = bs - full_bs
+    return full_bs, part_bs
+def assign_faces(
+    persons_bboxes: List[torch.tensor], faces_bboxes: List[torch.tensor], iou_thresh: float = 0.0001
+) -> Tuple[List[Optional[int]], List[int]]:
+    """
+    Assign person to each face if it is possible.
+    Return:
+        - assigned_faces List[Optional[int]]: mapping of face_ind to person_ind
+                                            ( assigned_faces[face_ind] = person_ind ). person_ind can be None
+        - unassigned_persons_inds List[int]: persons indexes without any assigned face
+    """
+    assigned_faces: List[Optional[int]] = [None for _ in range(len(faces_bboxes))]
+    unassigned_persons_inds: List[int] = [p_ind for p_ind in range(len(persons_bboxes))]
+    if len(persons_bboxes) == 0 or len(faces_bboxes) == 0:
+        return assigned_faces, unassigned_persons_inds
+    cost_matrix = box_iou(torch.stack(persons_bboxes), torch.stack(faces_bboxes), over_second=True).cpu().numpy()
+    persons_indexes, face_indexes = [], []
+    if len(cost_matrix) > 0:
+        persons_indexes, face_indexes = linear_sum_assignment(cost_matrix, maximize=True)
+    matched_persons = set()
+    for person_idx, face_idx in zip(persons_indexes, face_indexes):
+        ciou = cost_matrix[person_idx][face_idx]
+        if ciou > iou_thresh:
+            if person_idx in matched_persons:
+                # Person can not be assigned twice, in reality this should not happen
+                continue
+            assigned_faces[face_idx] = person_idx
+            matched_persons.add(person_idx)
+    unassigned_persons_inds = [p_ind for p_ind in range(len(persons_bboxes)) if p_ind not in matched_persons]
+    return assigned_faces, unassigned_persons_inds
+def class_letterbox(im, new_shape=(640, 640), color=(0, 0, 0), scaleup=True):
+    # Resize and pad image while meeting stride-multiple constraints
+    shape = im.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+    if im.shape[0] == new_shape[0] and im.shape[1] == new_shape[1]:
+        return im
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better val mAP)
+        r = min(r, 1.0)
+    # Compute padding
+    # ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+    if shape[::-1] != new_unpad:  # resize
+        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    return im
+def prepare_classification_images(
+    img_list: List[Optional[np.ndarray]],
+    target_size: int = 224,
+    mean=IMAGENET_DEFAULT_MEAN,
+    std=IMAGENET_DEFAULT_STD,
+    device=None,
+) -> torch.tensor:
+    prepared_images: List[torch.tensor] = []
+    for img in img_list:
+        if img is None:
+            img = torch.zeros((3, target_size, target_size), dtype=torch.float32)
+            img = F.normalize(img, mean=mean, std=std)
+            img = img.unsqueeze(0)
+            prepared_images.append(img)
+            continue
+        img = class_letterbox(img, new_shape=(target_size, target_size))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = img / 255.0
+        img = (img - mean) / std
+        img = img.astype(dtype=np.float32)
+        img = img.transpose((2, 0, 1))
+        img = np.ascontiguousarray(img)
+        img = torch.from_numpy(img)
+        img = img.unsqueeze(0)
+        prepared_images.append(img)
+    if len(prepared_images) == 0:
+        return None
+    prepared_input = torch.concat(prepared_images)
+    if device:
+        prepared_input = prepared_input.to(device)
+    return prepared_input
+def IOU(bb1: Union[tuple, list], bb2: Union[tuple, list], norm_second_bbox: bool = False) -> float:
+    # expects [ymin, xmin, ymax, xmax], doesnt matter absolute or relative
+    assert bb1[1] < bb1[3]
+    assert bb1[0] < bb1[2]
+    assert bb2[1] < bb2[3]
+    assert bb2[0] < bb2[2]
+    # determine the coordinates of the intersection rectangle
+    x_left = max(bb1[1], bb2[1])
+    y_top = max(bb1[0], bb2[0])
+    x_right = min(bb1[3], bb2[3])
+    y_bottom = min(bb1[2], bb2[2])
+    if x_right < x_left or y_bottom < y_top:
+        return 0.0
+    # The intersection of two axis-aligned bounding boxes is always an
+    # axis-aligned bounding box
+    intersection_area = (x_right - x_left) * (y_bottom - y_top)
+    # compute the area of both AABBs
+    bb1_area = (bb1[3] - bb1[1]) * (bb1[2] - bb1[0])
+    bb2_area = (bb2[3] - bb2[1]) * (bb2[2] - bb2[0])
+    if not norm_second_bbox:
+        # compute the intersection over union by taking the intersection
+        # area and dividing it by the sum of prediction + ground-truth
+        # areas - the interesection area
+        iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
+    else:
+        # for cases when we search if second bbox is inside first one
+        iou = intersection_area / float(bb2_area)
+    assert iou >= 0.0
+    assert iou <= 1.01
+    return iou

age_estimator/mivolo/mivolo/model/__init__.py ADDED Viewed

File without changes

age_estimator/mivolo/mivolo/model/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (162 Bytes). View file

age_estimator/mivolo/mivolo/model/__pycache__/create_timm_model.cpython-38.pyc ADDED Viewed

Binary file (2.96 kB). View file

age_estimator/mivolo/mivolo/model/__pycache__/cross_bottleneck_attn.cpython-38.pyc ADDED Viewed

Binary file (3.66 kB). View file

age_estimator/mivolo/mivolo/model/__pycache__/mi_volo.cpython-38.pyc ADDED Viewed

Binary file (7.18 kB). View file

age_estimator/mivolo/mivolo/model/__pycache__/mivolo_model.cpython-38.pyc ADDED Viewed

Binary file (10.2 kB). View file