MonsterMMORPG
commited on
Commit
•
d8e8a96
1
Parent(s):
bab3d3d
08f4ccb5459ba8d71ac7e1cb832d4f5e480338e9d9664a469baa1b1252917877
Browse files- Rope.bat +2 -0
- Rope.py +5 -0
- rope/Models.py +1961 -0
- rope/Styles.py +293 -0
- rope/VideoManager.py +1242 -0
- rope/media/tl_beg_off.png +0 -0
- rope/media/tl_beg_on.png +0 -0
- rope/media/tl_left_hover.png +0 -0
- rope/media/tl_left_off.png +0 -0
- rope/media/tl_left_on.png +0 -0
- rope/media/tl_right_hover.png +0 -0
- rope/media/tl_right_off.png +0 -0
- rope/media/tl_right_on.png +0 -0
Rope.bat
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
call conda activate Rope && python Rope.py
|
2 |
+
pause
|
Rope.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
from rope import Coordinator
|
4 |
+
if __name__ == "__main__":
|
5 |
+
Coordinator.run()
|
rope/Models.py
ADDED
@@ -0,0 +1,1961 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
from skimage import transform as trans
|
4 |
+
import torch
|
5 |
+
import torchvision
|
6 |
+
torchvision.disable_beta_transforms_warning()
|
7 |
+
from torchvision.transforms import v2
|
8 |
+
from numpy.linalg import norm as l2norm
|
9 |
+
import onnxruntime
|
10 |
+
import onnx
|
11 |
+
from itertools import product as product
|
12 |
+
import subprocess as sp
|
13 |
+
onnxruntime.set_default_logger_severity(4)
|
14 |
+
onnxruntime.log_verbosity_level = -1
|
15 |
+
import rope.FaceUtil as faceutil
|
16 |
+
import pickle
|
17 |
+
|
18 |
+
class Models():
|
19 |
+
def __init__(self):
|
20 |
+
self.arcface_dst = np.array( [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041]], dtype=np.float32)
|
21 |
+
self.providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
|
22 |
+
|
23 |
+
self.retinaface_model = []
|
24 |
+
self.yoloface_model = []
|
25 |
+
self.scrdf_model = []
|
26 |
+
self.yunet_model = []
|
27 |
+
self.face_landmark_68_model = []
|
28 |
+
self.face_landmark_3d68_model = []
|
29 |
+
self.mean_lmk = []
|
30 |
+
self.face_landmark_98_model = []
|
31 |
+
self.face_landmark_106_model = []
|
32 |
+
self.face_landmark_478_model = []
|
33 |
+
self.face_blendshapes_model = []
|
34 |
+
self.resnet50_model, self.anchors = [], []
|
35 |
+
|
36 |
+
self.insight106_model = []
|
37 |
+
|
38 |
+
self.recognition_model = []
|
39 |
+
self.swapper_model = []
|
40 |
+
self.swapper_model_kps = []
|
41 |
+
self.swapper_model_swap = []
|
42 |
+
|
43 |
+
self.emap = []
|
44 |
+
self.GFPGAN_model = []
|
45 |
+
self.GPEN_256_model = []
|
46 |
+
self.GPEN_512_model = []
|
47 |
+
self.GPEN_1024_model = []
|
48 |
+
self.codeformer_model = []
|
49 |
+
|
50 |
+
self.occluder_model = []
|
51 |
+
self.faceparser_model = []
|
52 |
+
|
53 |
+
self.syncvec = torch.empty((1,1), dtype=torch.float32, device='cuda:0')
|
54 |
+
|
55 |
+
self.normalize = v2.Normalize(mean = [ 0., 0., 0. ],
|
56 |
+
std = [ 1/1.0, 1/1.0, 1/1.0 ])
|
57 |
+
|
58 |
+
self.LandmarksSubsetIdxs = [
|
59 |
+
0, 1, 4, 5, 6, 7, 8, 10, 13, 14, 17, 21, 33, 37, 39,
|
60 |
+
40, 46, 52, 53, 54, 55, 58, 61, 63, 65, 66, 67, 70, 78, 80,
|
61 |
+
81, 82, 84, 87, 88, 91, 93, 95, 103, 105, 107, 109, 127, 132, 133,
|
62 |
+
136, 144, 145, 146, 148, 149, 150, 152, 153, 154, 155, 157, 158, 159, 160,
|
63 |
+
161, 162, 163, 168, 172, 173, 176, 178, 181, 185, 191, 195, 197, 234, 246,
|
64 |
+
249, 251, 263, 267, 269, 270, 276, 282, 283, 284, 285, 288, 291, 293, 295,
|
65 |
+
296, 297, 300, 308, 310, 311, 312, 314, 317, 318, 321, 323, 324, 332, 334,
|
66 |
+
336, 338, 356, 361, 362, 365, 373, 374, 375, 377, 378, 379, 380, 381, 382,
|
67 |
+
384, 385, 386, 387, 388, 389, 390, 397, 398, 400, 402, 405, 409, 415, 454,
|
68 |
+
466, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477
|
69 |
+
]
|
70 |
+
|
71 |
+
def get_gpu_memory(self):
|
72 |
+
command = "nvidia-smi --query-gpu=memory.total --format=csv"
|
73 |
+
memory_total_info = sp.check_output(command.split()).decode('ascii').split('\n')[:-1][1:]
|
74 |
+
memory_total = [int(x.split()[0]) for i, x in enumerate(memory_total_info)]
|
75 |
+
|
76 |
+
command = "nvidia-smi --query-gpu=memory.free --format=csv"
|
77 |
+
memory_free_info = sp.check_output(command.split()).decode('ascii').split('\n')[:-1][1:]
|
78 |
+
memory_free = [int(x.split()[0]) for i, x in enumerate(memory_free_info)]
|
79 |
+
|
80 |
+
memory_used = memory_total[0] - memory_free[0]
|
81 |
+
|
82 |
+
return memory_used, memory_total[0]
|
83 |
+
|
84 |
+
def run_detect(self, img, detect_mode='Retinaface', max_num=1, score=0.5, use_landmark_detection=False, landmark_detect_mode='98', landmark_score=0.5, from_points=False):
|
85 |
+
bboxes = []
|
86 |
+
kpss = []
|
87 |
+
|
88 |
+
if detect_mode=='Retinaface':
|
89 |
+
if not self.retinaface_model:
|
90 |
+
self.retinaface_model = onnxruntime.InferenceSession('./models/det_10g.onnx', providers=self.providers)
|
91 |
+
|
92 |
+
bboxes, kpss = self.detect_retinaface(img, max_num=max_num, score=score, use_landmark_detection=use_landmark_detection, landmark_detect_mode=landmark_detect_mode, landmark_score=landmark_score, from_points=from_points)
|
93 |
+
|
94 |
+
elif detect_mode=='SCRDF':
|
95 |
+
if not self.scrdf_model:
|
96 |
+
self.scrdf_model = onnxruntime.InferenceSession('./models/scrfd_2.5g_bnkps.onnx', providers=self.providers)
|
97 |
+
|
98 |
+
bboxes, kpss = self.detect_scrdf(img, max_num=max_num, score=score, use_landmark_detection=use_landmark_detection, landmark_detect_mode=landmark_detect_mode, landmark_score=landmark_score, from_points=from_points)
|
99 |
+
|
100 |
+
elif detect_mode=='Yolov8':
|
101 |
+
if not self.yoloface_model:
|
102 |
+
self.yoloface_model = onnxruntime.InferenceSession('./models/yoloface_8n.onnx', providers=self.providers)
|
103 |
+
#self.insight106_model = onnxruntime.InferenceSession('./models/2d106det.onnx', providers=self.providers)
|
104 |
+
|
105 |
+
bboxes, kpss = self.detect_yoloface(img, max_num=max_num, score=score, use_landmark_detection=use_landmark_detection, landmark_detect_mode=landmark_detect_mode, landmark_score=landmark_score, from_points=from_points)
|
106 |
+
|
107 |
+
elif detect_mode=='Yunet':
|
108 |
+
if not self.yunet_model:
|
109 |
+
self.yunet_model = onnxruntime.InferenceSession('./models/yunet_n_640_640.onnx', providers=self.providers)
|
110 |
+
|
111 |
+
bboxes, kpss = self.detect_yunet(img, max_num=max_num, score=score, use_landmark_detection=use_landmark_detection, landmark_detect_mode=landmark_detect_mode, landmark_score=landmark_score, from_points=from_points)
|
112 |
+
|
113 |
+
return bboxes, kpss
|
114 |
+
|
115 |
+
def run_detect_landmark(self, img, bbox, det_kpss, detect_mode='98', score=0.5, from_points=False):
|
116 |
+
kpss = []
|
117 |
+
scores = []
|
118 |
+
|
119 |
+
if detect_mode=='5':
|
120 |
+
if not self.resnet50_model:
|
121 |
+
self.resnet50_model = onnxruntime.InferenceSession("./models/res50.onnx", providers=self.providers)
|
122 |
+
|
123 |
+
feature_maps = [[64, 64], [32, 32], [16, 16]]
|
124 |
+
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
125 |
+
steps = [8, 16, 32]
|
126 |
+
image_size = 512
|
127 |
+
|
128 |
+
for k, f in enumerate(feature_maps):
|
129 |
+
min_size_array = min_sizes[k]
|
130 |
+
for i, j in product(range(f[0]), range(f[1])):
|
131 |
+
for min_size in min_size_array:
|
132 |
+
s_kx = min_size / image_size
|
133 |
+
s_ky = min_size / image_size
|
134 |
+
dense_cx = [x * steps[k] / image_size for x in [j + 0.5]]
|
135 |
+
dense_cy = [y * steps[k] / image_size for y in [i + 0.5]]
|
136 |
+
for cy, cx in product(dense_cy, dense_cx):
|
137 |
+
self.anchors += [cx, cy, s_kx, s_ky]
|
138 |
+
|
139 |
+
kpss, scores = self.detect_face_landmark_5(img, bbox=bbox, det_kpss=det_kpss, from_points=from_points)
|
140 |
+
|
141 |
+
elif detect_mode=='68':
|
142 |
+
if not self.face_landmark_68_model:
|
143 |
+
self.face_landmark_68_model = onnxruntime.InferenceSession('./models/2dfan4.onnx', providers=self.providers)
|
144 |
+
|
145 |
+
kpss, scores = self.detect_face_landmark_68(img, bbox=bbox, det_kpss=det_kpss, convert68_5=True, from_points=from_points)
|
146 |
+
|
147 |
+
elif detect_mode=='3d68':
|
148 |
+
if not self.face_landmark_3d68_model:
|
149 |
+
self.face_landmark_3d68_model = onnxruntime.InferenceSession('./models/1k3d68.onnx', providers=self.providers)
|
150 |
+
with open('./models/meanshape_68.pkl', 'rb') as f:
|
151 |
+
self.mean_lmk = pickle.load(f)
|
152 |
+
|
153 |
+
kpss, scores = self.detect_face_landmark_3d68(img, bbox=bbox, det_kpss=det_kpss, convert68_5=True, from_points=from_points)
|
154 |
+
|
155 |
+
return kpss, scores
|
156 |
+
|
157 |
+
elif detect_mode=='98':
|
158 |
+
if not self.face_landmark_98_model:
|
159 |
+
self.face_landmark_98_model = onnxruntime.InferenceSession('./models/peppapig_teacher_Nx3x256x256.onnx', providers=self.providers)
|
160 |
+
|
161 |
+
kpss, scores = self.detect_face_landmark_98(img, bbox=bbox, det_kpss=det_kpss, convert98_5=True, from_points=from_points)
|
162 |
+
|
163 |
+
elif detect_mode=='106':
|
164 |
+
if not self.face_landmark_106_model:
|
165 |
+
self.face_landmark_106_model = onnxruntime.InferenceSession('./models/2d106det.onnx', providers=self.providers)
|
166 |
+
|
167 |
+
kpss, scores = self.detect_face_landmark_106(img, bbox=bbox, det_kpss=det_kpss, convert106_5=True, from_points=from_points)
|
168 |
+
|
169 |
+
return kpss, scores
|
170 |
+
|
171 |
+
elif detect_mode=='478':
|
172 |
+
if not self.face_landmark_478_model:
|
173 |
+
self.face_landmark_478_model = onnxruntime.InferenceSession('./models/face_landmarks_detector_Nx3x256x256.onnx', providers=self.providers)
|
174 |
+
|
175 |
+
if not self.face_blendshapes_model:
|
176 |
+
self.face_blendshapes_model = onnxruntime.InferenceSession('./models/face_blendshapes_Nx146x2.onnx', providers=self.providers)
|
177 |
+
|
178 |
+
kpss, scores = self.detect_face_landmark_478(img, bbox=bbox, det_kpss=det_kpss, convert478_5=True, from_points=from_points)
|
179 |
+
|
180 |
+
return kpss, scores
|
181 |
+
|
182 |
+
if len(kpss) > 0:
|
183 |
+
if len(scores) > 0:
|
184 |
+
if np.mean(scores) >= score:
|
185 |
+
return kpss, scores
|
186 |
+
else:
|
187 |
+
return kpss, scores
|
188 |
+
|
189 |
+
return [], []
|
190 |
+
|
191 |
+
def delete_models(self):
|
192 |
+
self.retinaface_model = []
|
193 |
+
self.yoloface_model = []
|
194 |
+
self.scrdf_model = []
|
195 |
+
self.yunet_model = []
|
196 |
+
self.face_landmark_68_model = []
|
197 |
+
self.face_landmark_3d68_model = []
|
198 |
+
self.mean_lmk = []
|
199 |
+
self.face_landmark_98_model = []
|
200 |
+
self.face_landmark_106_model = []
|
201 |
+
self.face_landmark_478_model = []
|
202 |
+
self.face_blendshapes_model = []
|
203 |
+
self.resnet50_model = []
|
204 |
+
self.insight106_model = []
|
205 |
+
self.recognition_model = []
|
206 |
+
self.swapper_model = []
|
207 |
+
self.GFPGAN_model = []
|
208 |
+
self.GPEN_256_model = []
|
209 |
+
self.GPEN_512_model = []
|
210 |
+
self.GPEN_1024_model = []
|
211 |
+
self.codeformer_model = []
|
212 |
+
self.occluder_model = []
|
213 |
+
self.faceparser_model = []
|
214 |
+
|
215 |
+
def run_recognize(self, img, kps):
|
216 |
+
if not self.recognition_model:
|
217 |
+
self.recognition_model = onnxruntime.InferenceSession('./models/w600k_r50.onnx', providers=self.providers)
|
218 |
+
|
219 |
+
embedding, cropped_image = self.recognize(img, kps)
|
220 |
+
return embedding, cropped_image
|
221 |
+
|
222 |
+
def calc_swapper_latent(self, source_embedding):
|
223 |
+
if not self.swapper_model:
|
224 |
+
graph = onnx.load("./models/inswapper_128.fp16.onnx").graph
|
225 |
+
self.emap = onnx.numpy_helper.to_array(graph.initializer[-1])
|
226 |
+
|
227 |
+
n_e = source_embedding / l2norm(source_embedding)
|
228 |
+
latent = n_e.reshape((1,-1))
|
229 |
+
latent = np.dot(latent, self.emap)
|
230 |
+
latent /= np.linalg.norm(latent)
|
231 |
+
return latent
|
232 |
+
|
233 |
+
def run_swapper(self, image, embedding, output):
|
234 |
+
if not self.swapper_model:
|
235 |
+
cuda_options = {"arena_extend_strategy": "kSameAsRequested", 'cudnn_conv_algo_search': 'DEFAULT'}
|
236 |
+
sess_options = onnxruntime.SessionOptions()
|
237 |
+
sess_options.enable_cpu_mem_arena = False
|
238 |
+
|
239 |
+
# self.swapper_model = onnxruntime.InferenceSession( "./models/inswapper_128_last_cubic.onnx", sess_options, providers=[('CUDAExecutionProvider', cuda_options), 'CPUExecutionProvider'])
|
240 |
+
|
241 |
+
self.swapper_model = onnxruntime.InferenceSession( "./models/inswapper_128.fp16.onnx", providers=self.providers)
|
242 |
+
|
243 |
+
io_binding = self.swapper_model.io_binding()
|
244 |
+
io_binding.bind_input(name='target', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,128,128), buffer_ptr=image.data_ptr())
|
245 |
+
io_binding.bind_input(name='source', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,512), buffer_ptr=embedding.data_ptr())
|
246 |
+
io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,128,128), buffer_ptr=output.data_ptr())
|
247 |
+
|
248 |
+
self.syncvec.cpu()
|
249 |
+
self.swapper_model.run_with_iobinding(io_binding)
|
250 |
+
|
251 |
+
def run_swap_stg1(self, embedding):
|
252 |
+
|
253 |
+
# Load model
|
254 |
+
if not self.swapper_model_kps:
|
255 |
+
self.swapper_model_kps = onnxruntime.InferenceSession( "./models/inswapper_kps.onnx", providers=self.providers)
|
256 |
+
|
257 |
+
# Wacky data structure
|
258 |
+
io_binding = self.swapper_model_kps.io_binding()
|
259 |
+
kps_1 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
260 |
+
kps_2 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
261 |
+
kps_3 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
262 |
+
kps_4 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
263 |
+
kps_5 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
264 |
+
kps_6 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
265 |
+
kps_7 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
266 |
+
kps_8 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
267 |
+
kps_9 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
268 |
+
kps_10 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
269 |
+
kps_11 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
270 |
+
kps_12 = torch.ones((1, 2048), dtype=torch.float16, device='cuda').contiguous()
|
271 |
+
|
272 |
+
# Bind the data structures
|
273 |
+
io_binding.bind_input(name='source', device_type='cuda', device_id=0, element_type=np.float32, shape=(1, 512), buffer_ptr=embedding.data_ptr())
|
274 |
+
io_binding.bind_output(name='1', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_1.data_ptr())
|
275 |
+
io_binding.bind_output(name='2', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_2.data_ptr())
|
276 |
+
io_binding.bind_output(name='3', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_3.data_ptr())
|
277 |
+
io_binding.bind_output(name='4', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_4.data_ptr())
|
278 |
+
io_binding.bind_output(name='5', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_5.data_ptr())
|
279 |
+
io_binding.bind_output(name='6', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_6.data_ptr())
|
280 |
+
io_binding.bind_output(name='7', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_7.data_ptr())
|
281 |
+
io_binding.bind_output(name='8', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_8.data_ptr())
|
282 |
+
io_binding.bind_output(name='9', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_9.data_ptr())
|
283 |
+
io_binding.bind_output(name='10', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_10.data_ptr())
|
284 |
+
io_binding.bind_output(name='11', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_11.data_ptr())
|
285 |
+
io_binding.bind_output(name='12', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=kps_12.data_ptr())
|
286 |
+
|
287 |
+
self.syncvec.cpu()
|
288 |
+
self.swapper_model_kps.run_with_iobinding(io_binding)
|
289 |
+
|
290 |
+
# List of pointers
|
291 |
+
holder = []
|
292 |
+
holder.append(kps_1)
|
293 |
+
holder.append(kps_2)
|
294 |
+
holder.append(kps_3)
|
295 |
+
holder.append(kps_4)
|
296 |
+
holder.append(kps_5)
|
297 |
+
holder.append(kps_6)
|
298 |
+
holder.append(kps_7)
|
299 |
+
holder.append(kps_8)
|
300 |
+
holder.append(kps_9)
|
301 |
+
holder.append(kps_10)
|
302 |
+
holder.append(kps_11)
|
303 |
+
holder.append(kps_12)
|
304 |
+
|
305 |
+
return holder
|
306 |
+
|
307 |
+
|
308 |
+
def run_swap_stg2(self, image, holder, output):
|
309 |
+
if not self.swapper_model_swap:
|
310 |
+
self.swapper_model_swap = onnxruntime.InferenceSession( "./models/inswapper_swap.onnx", providers=self.providers)
|
311 |
+
|
312 |
+
io_binding = self.swapper_model_swap.io_binding()
|
313 |
+
io_binding.bind_input(name='target', device_type='cuda', device_id=0, element_type=np.float32, shape=(1, 3, 128, 128), buffer_ptr=image.data_ptr())
|
314 |
+
io_binding.bind_input(name='onnx::Unsqueeze_170', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[0].data_ptr())
|
315 |
+
io_binding.bind_input(name='onnx::Unsqueeze_224', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[1].data_ptr())
|
316 |
+
io_binding.bind_input(name='onnx::Unsqueeze_278', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[2].data_ptr())
|
317 |
+
io_binding.bind_input(name='onnx::Unsqueeze_332', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[3].data_ptr())
|
318 |
+
io_binding.bind_input(name='onnx::Unsqueeze_386', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[4].data_ptr())
|
319 |
+
io_binding.bind_input(name='onnx::Unsqueeze_440', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[5].data_ptr())
|
320 |
+
io_binding.bind_input(name='onnx::Unsqueeze_494', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[6].data_ptr())
|
321 |
+
io_binding.bind_input(name='onnx::Unsqueeze_548', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[7].data_ptr())
|
322 |
+
io_binding.bind_input(name='onnx::Unsqueeze_602', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[8].data_ptr())
|
323 |
+
io_binding.bind_input(name='onnx::Unsqueeze_656', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[9].data_ptr())
|
324 |
+
io_binding.bind_input(name='onnx::Unsqueeze_710', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[10].data_ptr())
|
325 |
+
io_binding.bind_input(name='onnx::Unsqueeze_764', device_type='cuda', device_id=0, element_type=np.float16, shape=(1, 2048), buffer_ptr=holder[11].data_ptr())
|
326 |
+
io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1, 3, 128, 128), buffer_ptr=output.data_ptr())
|
327 |
+
|
328 |
+
self.syncvec.cpu()
|
329 |
+
self.swapper_model_swap.run_with_iobinding(io_binding)
|
330 |
+
def run_GFPGAN(self, image, output):
|
331 |
+
if not self.GFPGAN_model:
|
332 |
+
# cuda_options = {"arena_extend_strategy": "kSameAsRequested", 'cudnn_conv_algo_search': 'DEFAULT'}
|
333 |
+
# sess_options = onnxruntime.SessionOptions()
|
334 |
+
# sess_options.enable_cpu_mem_arena = False
|
335 |
+
|
336 |
+
# self.GFPGAN_model = onnxruntime.InferenceSession( "./models/GFPGANv1.4.onnx", sess_options, providers=[("CUDAExecutionProvider", cuda_options), 'CPUExecutionProvider'])
|
337 |
+
|
338 |
+
self.GFPGAN_model = onnxruntime.InferenceSession( "./models/GFPGANv1.4.onnx", providers=self.providers)
|
339 |
+
|
340 |
+
io_binding = self.GFPGAN_model.io_binding()
|
341 |
+
io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
|
342 |
+
io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=output.data_ptr())
|
343 |
+
|
344 |
+
self.syncvec.cpu()
|
345 |
+
self.GFPGAN_model.run_with_iobinding(io_binding)
|
346 |
+
|
347 |
+
def run_GPEN_1024(self, image, output):
|
348 |
+
if not self.GPEN_1024_model:
|
349 |
+
self.GPEN_1024_model = onnxruntime.InferenceSession( "./models/GPEN-BFR-1024.onnx", providers=self.providers)
|
350 |
+
|
351 |
+
io_binding = self.GPEN_1024_model.io_binding()
|
352 |
+
io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,1024,1024), buffer_ptr=image.data_ptr())
|
353 |
+
io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,1024,1024), buffer_ptr=output.data_ptr())
|
354 |
+
|
355 |
+
self.syncvec.cpu()
|
356 |
+
self.GPEN_1024_model.run_with_iobinding(io_binding)
|
357 |
+
|
358 |
+
def run_GPEN_512(self, image, output):
|
359 |
+
if not self.GPEN_512_model:
|
360 |
+
self.GPEN_512_model = onnxruntime.InferenceSession( "./models/GPEN-BFR-512.onnx", providers=self.providers)
|
361 |
+
|
362 |
+
io_binding = self.GPEN_512_model.io_binding()
|
363 |
+
io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
|
364 |
+
io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=output.data_ptr())
|
365 |
+
|
366 |
+
self.syncvec.cpu()
|
367 |
+
self.GPEN_512_model.run_with_iobinding(io_binding)
|
368 |
+
|
369 |
+
def run_GPEN_256(self, image, output):
|
370 |
+
if not self.GPEN_256_model:
|
371 |
+
self.GPEN_256_model = onnxruntime.InferenceSession( "./models/GPEN-BFR-256.onnx", providers=self.providers)
|
372 |
+
|
373 |
+
io_binding = self.GPEN_256_model.io_binding()
|
374 |
+
io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,256,256), buffer_ptr=image.data_ptr())
|
375 |
+
io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,256,256), buffer_ptr=output.data_ptr())
|
376 |
+
|
377 |
+
self.syncvec.cpu()
|
378 |
+
self.GPEN_256_model.run_with_iobinding(io_binding)
|
379 |
+
|
380 |
+
def run_codeformer(self, image, output):
|
381 |
+
if not self.codeformer_model:
|
382 |
+
self.codeformer_model = onnxruntime.InferenceSession( "./models/codeformer_fp16.onnx", providers=self.providers)
|
383 |
+
|
384 |
+
io_binding = self.codeformer_model.io_binding()
|
385 |
+
io_binding.bind_input(name='x', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
|
386 |
+
w = np.array([0.9], dtype=np.double)
|
387 |
+
io_binding.bind_cpu_input('w', w)
|
388 |
+
io_binding.bind_output(name='y', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=output.data_ptr())
|
389 |
+
|
390 |
+
self.syncvec.cpu()
|
391 |
+
self.codeformer_model.run_with_iobinding(io_binding)
|
392 |
+
|
393 |
+
def run_occluder(self, image, output):
|
394 |
+
if not self.occluder_model:
|
395 |
+
self.occluder_model = onnxruntime.InferenceSession("./models/occluder.onnx", providers=self.providers)
|
396 |
+
|
397 |
+
io_binding = self.occluder_model.io_binding()
|
398 |
+
io_binding.bind_input(name='img', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,256,256), buffer_ptr=image.data_ptr())
|
399 |
+
io_binding.bind_output(name='output', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,1,256,256), buffer_ptr=output.data_ptr())
|
400 |
+
|
401 |
+
# torch.cuda.synchronize('cuda')
|
402 |
+
self.syncvec.cpu()
|
403 |
+
self.occluder_model.run_with_iobinding(io_binding)
|
404 |
+
|
405 |
+
def run_faceparser(self, image, output):
|
406 |
+
if not self.faceparser_model:
|
407 |
+
self.faceparser_model = onnxruntime.InferenceSession("./models/faceparser_fp16.onnx", providers=self.providers)
|
408 |
+
|
409 |
+
io_binding = self.faceparser_model.io_binding()
|
410 |
+
io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
|
411 |
+
io_binding.bind_output(name='out', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,19,512,512), buffer_ptr=output.data_ptr())
|
412 |
+
|
413 |
+
# torch.cuda.synchronize('cuda')
|
414 |
+
self.syncvec.cpu()
|
415 |
+
self.faceparser_model.run_with_iobinding(io_binding)
|
416 |
+
|
417 |
+
def detect_retinaface(self, img, max_num, score, use_landmark_detection, landmark_detect_mode, landmark_score, from_points):
|
418 |
+
if use_landmark_detection:
|
419 |
+
img_landmark = img.clone()
|
420 |
+
|
421 |
+
# Resize image to fit within the input_size
|
422 |
+
input_size = (640, 640)
|
423 |
+
im_ratio = torch.div(img.size()[1], img.size()[2])
|
424 |
+
|
425 |
+
# model_ratio = float(input_size[1]) / input_size[0]
|
426 |
+
model_ratio = 1.0
|
427 |
+
if im_ratio>model_ratio:
|
428 |
+
new_height = input_size[1]
|
429 |
+
new_width = int(new_height / im_ratio)
|
430 |
+
else:
|
431 |
+
new_width = input_size[0]
|
432 |
+
new_height = int(new_width * im_ratio)
|
433 |
+
det_scale = torch.div(new_height, img.size()[1])
|
434 |
+
|
435 |
+
resize = v2.Resize((new_height, new_width), antialias=True)
|
436 |
+
img = resize(img)
|
437 |
+
img = img.permute(1,2,0)
|
438 |
+
|
439 |
+
det_img = torch.zeros((input_size[1], input_size[0], 3), dtype=torch.float32, device='cuda:0')
|
440 |
+
det_img[:new_height,:new_width, :] = img
|
441 |
+
|
442 |
+
# Switch to BGR and normalize
|
443 |
+
det_img = det_img[:, :, [2,1,0]]
|
444 |
+
det_img = torch.sub(det_img, 127.5)
|
445 |
+
det_img = torch.div(det_img, 128.0)
|
446 |
+
det_img = det_img.permute(2, 0, 1) #3,128,128
|
447 |
+
|
448 |
+
# Prepare data and find model parameters
|
449 |
+
det_img = torch.unsqueeze(det_img, 0).contiguous()
|
450 |
+
|
451 |
+
io_binding = self.retinaface_model.io_binding()
|
452 |
+
io_binding.bind_input(name='input.1', device_type='cuda', device_id=0, element_type=np.float32, shape=det_img.size(), buffer_ptr=det_img.data_ptr())
|
453 |
+
|
454 |
+
io_binding.bind_output('448', 'cuda')
|
455 |
+
io_binding.bind_output('471', 'cuda')
|
456 |
+
io_binding.bind_output('494', 'cuda')
|
457 |
+
io_binding.bind_output('451', 'cuda')
|
458 |
+
io_binding.bind_output('474', 'cuda')
|
459 |
+
io_binding.bind_output('497', 'cuda')
|
460 |
+
io_binding.bind_output('454', 'cuda')
|
461 |
+
io_binding.bind_output('477', 'cuda')
|
462 |
+
io_binding.bind_output('500', 'cuda')
|
463 |
+
|
464 |
+
# Sync and run model
|
465 |
+
self.syncvec.cpu()
|
466 |
+
self.retinaface_model.run_with_iobinding(io_binding)
|
467 |
+
|
468 |
+
net_outs = io_binding.copy_outputs_to_cpu()
|
469 |
+
|
470 |
+
input_height = det_img.shape[2]
|
471 |
+
input_width = det_img.shape[3]
|
472 |
+
|
473 |
+
fmc = 3
|
474 |
+
center_cache = {}
|
475 |
+
scores_list = []
|
476 |
+
bboxes_list = []
|
477 |
+
kpss_list = []
|
478 |
+
for idx, stride in enumerate([8, 16, 32]):
|
479 |
+
scores = net_outs[idx]
|
480 |
+
bbox_preds = net_outs[idx+fmc]
|
481 |
+
bbox_preds = bbox_preds * stride
|
482 |
+
|
483 |
+
kps_preds = net_outs[idx+fmc*2] * stride
|
484 |
+
height = input_height // stride
|
485 |
+
width = input_width // stride
|
486 |
+
K = height * width
|
487 |
+
key = (height, width, stride)
|
488 |
+
if key in center_cache:
|
489 |
+
anchor_centers = center_cache[key]
|
490 |
+
else:
|
491 |
+
anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
|
492 |
+
anchor_centers = (anchor_centers * stride).reshape( (-1, 2) )
|
493 |
+
anchor_centers = np.stack([anchor_centers]*2, axis=1).reshape( (-1,2) )
|
494 |
+
if len(center_cache)<100:
|
495 |
+
center_cache[key] = anchor_centers
|
496 |
+
|
497 |
+
pos_inds = np.where(scores>=score)[0]
|
498 |
+
|
499 |
+
x1 = anchor_centers[:, 0] - bbox_preds[:, 0]
|
500 |
+
y1 = anchor_centers[:, 1] - bbox_preds[:, 1]
|
501 |
+
x2 = anchor_centers[:, 0] + bbox_preds[:, 2]
|
502 |
+
y2 = anchor_centers[:, 1] + bbox_preds[:, 3]
|
503 |
+
|
504 |
+
bboxes = np.stack([x1, y1, x2, y2], axis=-1)
|
505 |
+
|
506 |
+
pos_scores = scores[pos_inds]
|
507 |
+
pos_bboxes = bboxes[pos_inds]
|
508 |
+
scores_list.append(pos_scores)
|
509 |
+
bboxes_list.append(pos_bboxes)
|
510 |
+
|
511 |
+
preds = []
|
512 |
+
for i in range(0, kps_preds.shape[1], 2):
|
513 |
+
px = anchor_centers[:, i%2] + kps_preds[:, i]
|
514 |
+
py = anchor_centers[:, i%2+1] + kps_preds[:, i+1]
|
515 |
+
|
516 |
+
preds.append(px)
|
517 |
+
preds.append(py)
|
518 |
+
kpss = np.stack(preds, axis=-1)
|
519 |
+
#kpss = kps_preds
|
520 |
+
kpss = kpss.reshape( (kpss.shape[0], -1, 2) )
|
521 |
+
pos_kpss = kpss[pos_inds]
|
522 |
+
kpss_list.append(pos_kpss)
|
523 |
+
|
524 |
+
scores = np.vstack(scores_list)
|
525 |
+
scores_ravel = scores.ravel()
|
526 |
+
order = scores_ravel.argsort()[::-1]
|
527 |
+
|
528 |
+
det_scale = det_scale.numpy()###
|
529 |
+
|
530 |
+
bboxes = np.vstack(bboxes_list) / det_scale
|
531 |
+
|
532 |
+
kpss = np.vstack(kpss_list) / det_scale
|
533 |
+
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
|
534 |
+
pre_det = pre_det[order, :]
|
535 |
+
|
536 |
+
dets = pre_det
|
537 |
+
thresh = 0.4
|
538 |
+
x1 = dets[:, 0]
|
539 |
+
y1 = dets[:, 1]
|
540 |
+
x2 = dets[:, 2]
|
541 |
+
y2 = dets[:, 3]
|
542 |
+
scoresb = dets[:, 4]
|
543 |
+
|
544 |
+
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
545 |
+
orderb = scoresb.argsort()[::-1]
|
546 |
+
|
547 |
+
keep = []
|
548 |
+
while orderb.size > 0:
|
549 |
+
i = orderb[0]
|
550 |
+
keep.append(i)
|
551 |
+
xx1 = np.maximum(x1[i], x1[orderb[1:]])
|
552 |
+
yy1 = np.maximum(y1[i], y1[orderb[1:]])
|
553 |
+
xx2 = np.minimum(x2[i], x2[orderb[1:]])
|
554 |
+
yy2 = np.minimum(y2[i], y2[orderb[1:]])
|
555 |
+
|
556 |
+
w = np.maximum(0.0, xx2 - xx1 + 1)
|
557 |
+
h = np.maximum(0.0, yy2 - yy1 + 1)
|
558 |
+
|
559 |
+
inter = w * h
|
560 |
+
ovr = inter / (areas[i] + areas[orderb[1:]] - inter)
|
561 |
+
|
562 |
+
inds = np.where(ovr <= thresh)[0]
|
563 |
+
orderb = orderb[inds + 1]
|
564 |
+
|
565 |
+
det = pre_det[keep, :]
|
566 |
+
|
567 |
+
kpss = kpss[order,:,:]
|
568 |
+
kpss = kpss[keep,:,:]
|
569 |
+
|
570 |
+
if max_num > 0 and det.shape[0] > max_num:
|
571 |
+
area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
|
572 |
+
det_img_center = det_img.shape[0] // 2, det_img.shape[1] // 2
|
573 |
+
offsets = np.vstack([
|
574 |
+
(det[:, 0] + det[:, 2]) / 2 - det_img_center[1],
|
575 |
+
(det[:, 1] + det[:, 3]) / 2 - det_img_center[0]
|
576 |
+
])
|
577 |
+
offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
|
578 |
+
|
579 |
+
values = area - offset_dist_squared * 2.0 # some extra weight on the centering
|
580 |
+
bindex = np.argsort(values)[::-1] # some extra weight on the centering
|
581 |
+
bindex = bindex[0:max_num]
|
582 |
+
|
583 |
+
det = det[bindex, :]
|
584 |
+
if kpss is not None:
|
585 |
+
kpss = kpss[bindex, :]
|
586 |
+
|
587 |
+
score_values = det[:, 4]
|
588 |
+
# delete score column
|
589 |
+
det = np.delete(det, 4, 1)
|
590 |
+
|
591 |
+
if use_landmark_detection and len(kpss) > 0:
|
592 |
+
for i in range(kpss.shape[0]):
|
593 |
+
landmark_kpss, landmark_scores = self.run_detect_landmark(img_landmark, det[i], kpss[i], landmark_detect_mode, landmark_score, from_points)
|
594 |
+
if len(landmark_kpss) > 0:
|
595 |
+
if len(landmark_scores) > 0:
|
596 |
+
#print(np.mean(landmark_scores))
|
597 |
+
#print(np.mean(score_values[i]))
|
598 |
+
if np.mean(landmark_scores) > np.mean(score_values[i]):
|
599 |
+
kpss[i] = landmark_kpss
|
600 |
+
else:
|
601 |
+
kpss[i] = landmark_kpss
|
602 |
+
|
603 |
+
return det, kpss
|
604 |
+
|
605 |
+
def detect_retinaface2(self, img, max_num, score):
|
606 |
+
|
607 |
+
# Resize image to fit within the input_size
|
608 |
+
input_size = (640, 640)
|
609 |
+
im_ratio = torch.div(img.size()[1], img.size()[2])
|
610 |
+
|
611 |
+
# model_ratio = float(input_size[1]) / input_size[0]
|
612 |
+
model_ratio = 1.0
|
613 |
+
if im_ratio > model_ratio:
|
614 |
+
new_height = input_size[1]
|
615 |
+
new_width = int(new_height / im_ratio)
|
616 |
+
else:
|
617 |
+
new_width = input_size[0]
|
618 |
+
new_height = int(new_width * im_ratio)
|
619 |
+
det_scale = torch.div(new_height, img.size()[1])
|
620 |
+
|
621 |
+
resize = v2.Resize((new_height, new_width), antialias=True)
|
622 |
+
img = resize(img)
|
623 |
+
img = img.permute(1, 2, 0)
|
624 |
+
|
625 |
+
det_img = torch.zeros((input_size[1], input_size[0], 3), dtype=torch.float32, device='cuda:0')
|
626 |
+
det_img[:new_height, :new_width, :] = img
|
627 |
+
|
628 |
+
# Switch to BGR and normalize
|
629 |
+
det_img = det_img[:, :, [2, 1, 0]]
|
630 |
+
det_img = torch.sub(det_img, 127.5)
|
631 |
+
det_img = torch.div(det_img, 128.0)
|
632 |
+
det_img = det_img.permute(2, 0, 1) # 3,128,128
|
633 |
+
|
634 |
+
# Prepare data and find model parameters
|
635 |
+
det_img = torch.unsqueeze(det_img, 0).contiguous()
|
636 |
+
|
637 |
+
io_binding = self.retinaface_model.io_binding()
|
638 |
+
io_binding.bind_input(name='input.1', device_type='cuda', device_id=0, element_type=np.float32, shape=det_img.size(), buffer_ptr=det_img.data_ptr())
|
639 |
+
|
640 |
+
io_binding.bind_output('448', 'cuda')
|
641 |
+
io_binding.bind_output('471', 'cuda')
|
642 |
+
io_binding.bind_output('494', 'cuda')
|
643 |
+
io_binding.bind_output('451', 'cuda')
|
644 |
+
io_binding.bind_output('474', 'cuda')
|
645 |
+
io_binding.bind_output('497', 'cuda')
|
646 |
+
io_binding.bind_output('454', 'cuda')
|
647 |
+
io_binding.bind_output('477', 'cuda')
|
648 |
+
io_binding.bind_output('500', 'cuda')
|
649 |
+
|
650 |
+
# Sync and run model
|
651 |
+
self.syncvec.cpu()
|
652 |
+
self.retinaface_model.run_with_iobinding(io_binding)
|
653 |
+
|
654 |
+
net_outs = io_binding.copy_outputs_to_cpu()
|
655 |
+
|
656 |
+
input_height = det_img.shape[2]
|
657 |
+
input_width = det_img.shape[3]
|
658 |
+
|
659 |
+
fmc = 3
|
660 |
+
center_cache = {}
|
661 |
+
scores_list = []
|
662 |
+
bboxes_list = []
|
663 |
+
kpss_list = []
|
664 |
+
for idx, stride in enumerate([8, 16, 32]):
|
665 |
+
scores = net_outs[idx]
|
666 |
+
bbox_preds = net_outs[idx + fmc]
|
667 |
+
bbox_preds = bbox_preds * stride
|
668 |
+
|
669 |
+
kps_preds = net_outs[idx + fmc * 2] * stride
|
670 |
+
height = input_height // stride
|
671 |
+
width = input_width // stride
|
672 |
+
K = height * width
|
673 |
+
key = (height, width, stride)
|
674 |
+
if key in center_cache:
|
675 |
+
anchor_centers = center_cache[key]
|
676 |
+
else:
|
677 |
+
anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
|
678 |
+
anchor_centers = (anchor_centers * stride).reshape((-1, 2))
|
679 |
+
anchor_centers = np.stack([anchor_centers] * 2, axis=1).reshape((-1, 2))
|
680 |
+
if len(center_cache) < 100:
|
681 |
+
center_cache[key] = anchor_centers
|
682 |
+
|
683 |
+
pos_inds = np.where(scores >= score)[0]
|
684 |
+
|
685 |
+
x1 = anchor_centers[:, 0] - bbox_preds[:, 0]
|
686 |
+
y1 = anchor_centers[:, 1] - bbox_preds[:, 1]
|
687 |
+
x2 = anchor_centers[:, 0] + bbox_preds[:, 2]
|
688 |
+
y2 = anchor_centers[:, 1] + bbox_preds[:, 3]
|
689 |
+
|
690 |
+
bboxes = np.stack([x1, y1, x2, y2], axis=-1)
|
691 |
+
|
692 |
+
pos_scores = scores[pos_inds]
|
693 |
+
pos_bboxes = bboxes[pos_inds]
|
694 |
+
scores_list.append(pos_scores)
|
695 |
+
bboxes_list.append(pos_bboxes)
|
696 |
+
|
697 |
+
preds = []
|
698 |
+
for i in range(0, kps_preds.shape[1], 2):
|
699 |
+
px = anchor_centers[:, i % 2] + kps_preds[:, i]
|
700 |
+
py = anchor_centers[:, i % 2 + 1] + kps_preds[:, i + 1]
|
701 |
+
|
702 |
+
preds.append(px)
|
703 |
+
preds.append(py)
|
704 |
+
kpss = np.stack(preds, axis=-1)
|
705 |
+
# kpss = kps_preds
|
706 |
+
kpss = kpss.reshape((kpss.shape[0], -1, 2))
|
707 |
+
pos_kpss = kpss[pos_inds]
|
708 |
+
kpss_list.append(pos_kpss)
|
709 |
+
# result_boxes = cv2.dnn.NMSBoxes(bboxes_list, scores_list, 0.25, 0.45, 0.5)
|
710 |
+
# print(result_boxes)
|
711 |
+
scores = np.vstack(scores_list)
|
712 |
+
scores_ravel = scores.ravel()
|
713 |
+
order = scores_ravel.argsort()[::-1]
|
714 |
+
|
715 |
+
det_scale = det_scale.numpy() ###
|
716 |
+
|
717 |
+
bboxes = np.vstack(bboxes_list) / det_scale
|
718 |
+
|
719 |
+
kpss = np.vstack(kpss_list) / det_scale
|
720 |
+
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
|
721 |
+
pre_det = pre_det[order, :]
|
722 |
+
|
723 |
+
|
724 |
+
|
725 |
+
dets = pre_det
|
726 |
+
thresh = 0.4
|
727 |
+
x1 = dets[:, 0]
|
728 |
+
y1 = dets[:, 1]
|
729 |
+
x2 = dets[:, 2]
|
730 |
+
y2 = dets[:, 3]
|
731 |
+
scoresb = dets[:, 4]
|
732 |
+
|
733 |
+
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
734 |
+
orderb = scoresb.argsort()[::-1]
|
735 |
+
|
736 |
+
keep = []
|
737 |
+
person_id = 0
|
738 |
+
people = {}
|
739 |
+
|
740 |
+
while orderb.size > 0:
|
741 |
+
# Add first box in list
|
742 |
+
i = orderb[0]
|
743 |
+
keep.append(i)
|
744 |
+
|
745 |
+
people[person_id] = orderb[0]
|
746 |
+
|
747 |
+
|
748 |
+
# Find overlap of remaining boxes
|
749 |
+
xx1 = np.maximum(x1[i], x1[orderb[1:]])
|
750 |
+
yy1 = np.maximum(y1[i], y1[orderb[1:]])
|
751 |
+
xx2 = np.minimum(x2[i], x2[orderb[1:]])
|
752 |
+
yy2 = np.minimum(y2[i], y2[orderb[1:]])
|
753 |
+
|
754 |
+
w = np.maximum(0.0, xx2 - xx1 + 1)
|
755 |
+
h = np.maximum(0.0, yy2 - yy1 + 1)
|
756 |
+
|
757 |
+
inter = w * h
|
758 |
+
|
759 |
+
|
760 |
+
ovr = inter / (areas[i] + areas[orderb[1:]] - inter)
|
761 |
+
|
762 |
+
|
763 |
+
inds0 = np.where(ovr > thresh)[0]
|
764 |
+
people[person_id] = np.hstack((people[person_id], orderb[inds0+1])).astype(np.int, copy=False)
|
765 |
+
|
766 |
+
|
767 |
+
# identify where there is no overlap (<thresh)
|
768 |
+
inds = np.where(ovr <= thresh)[0]
|
769 |
+
# print(len(inds))
|
770 |
+
|
771 |
+
|
772 |
+
orderb = orderb[inds+1]
|
773 |
+
person_id += 1
|
774 |
+
|
775 |
+
|
776 |
+
|
777 |
+
det = pre_det[keep, :]
|
778 |
+
|
779 |
+
|
780 |
+
kpss = kpss[order, :, :]
|
781 |
+
# print('order', kpss)
|
782 |
+
# kpss = kpss[keep, :, :]
|
783 |
+
# print('keep',kpss)
|
784 |
+
|
785 |
+
kpss_ave = []
|
786 |
+
for person in people:
|
787 |
+
# print(kpss[people[person], :, :])
|
788 |
+
# print('mean', np.mean(kpss[people[person], :, :], axis=0))
|
789 |
+
# print(kpss[people[person], :, :].shape)
|
790 |
+
kpss_ave.append(np.mean(kpss[people[person], :, :], axis=0).tolist())
|
791 |
+
|
792 |
+
|
793 |
+
if max_num > 0 and det.shape[0] > max_num:
|
794 |
+
area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
|
795 |
+
det_img_center = det_img.shape[0] // 2, det_img.shape[1] // 2
|
796 |
+
offsets = np.vstack([
|
797 |
+
(det[:, 0] + det[:, 2]) / 2 - det_img_center[1],
|
798 |
+
(det[:, 1] + det[:, 3]) / 2 - det_img_center[0]
|
799 |
+
])
|
800 |
+
offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
|
801 |
+
|
802 |
+
values = area - offset_dist_squared * 2.0 # some extra weight on the centering
|
803 |
+
bindex = np.argsort(values)[::-1] # some extra weight on the centering
|
804 |
+
bindex = bindex[0:max_num]
|
805 |
+
|
806 |
+
det = det[bindex, :]
|
807 |
+
if kpss is not None:
|
808 |
+
kpss = kpss[bindex, :]
|
809 |
+
|
810 |
+
# return kpss_ave
|
811 |
+
|
812 |
+
# delete score column
|
813 |
+
det = np.delete(det, 4, 1)
|
814 |
+
|
815 |
+
return kpss_ave
|
816 |
+
|
817 |
+
def detect_scrdf(self, img, max_num, score, use_landmark_detection, landmark_detect_mode, landmark_score, from_points):
|
818 |
+
if use_landmark_detection:
|
819 |
+
img_landmark = img.clone()
|
820 |
+
|
821 |
+
# Resize image to fit within the input_size
|
822 |
+
input_size = (640, 640)
|
823 |
+
im_ratio = torch.div(img.size()[1], img.size()[2])
|
824 |
+
|
825 |
+
model_ratio = float(input_size[1]) / input_size[0]
|
826 |
+
if im_ratio>model_ratio:
|
827 |
+
new_height = input_size[1]
|
828 |
+
new_width = int(new_height / im_ratio)
|
829 |
+
else:
|
830 |
+
new_width = input_size[0]
|
831 |
+
new_height = int(new_width * im_ratio)
|
832 |
+
det_scale = torch.div(new_height, img.size()[1])
|
833 |
+
|
834 |
+
resize = v2.Resize((new_height, new_width), antialias=True)
|
835 |
+
img = resize(img)
|
836 |
+
img = img.permute(1,2,0)
|
837 |
+
|
838 |
+
det_img = torch.zeros((input_size[1], input_size[0], 3), dtype=torch.float32, device='cuda:0')
|
839 |
+
det_img[:new_height,:new_width, :] = img
|
840 |
+
|
841 |
+
# Switch to BGR and normalize
|
842 |
+
det_img = det_img[:, :, [2,1,0]]
|
843 |
+
det_img = torch.sub(det_img, 127.5)
|
844 |
+
det_img = torch.div(det_img, 128.0)
|
845 |
+
det_img = det_img.permute(2, 0, 1) #3,128,128
|
846 |
+
|
847 |
+
# Prepare data and find model parameters
|
848 |
+
det_img = torch.unsqueeze(det_img, 0).contiguous()
|
849 |
+
input_name = self.scrdf_model.get_inputs()[0].name
|
850 |
+
|
851 |
+
outputs = self.scrdf_model.get_outputs()
|
852 |
+
output_names = []
|
853 |
+
for o in outputs:
|
854 |
+
output_names.append(o.name)
|
855 |
+
|
856 |
+
io_binding = self.scrdf_model.io_binding()
|
857 |
+
io_binding.bind_input(name=input_name, device_type='cuda', device_id=0, element_type=np.float32, shape=det_img.size(), buffer_ptr=det_img.data_ptr())
|
858 |
+
|
859 |
+
for i in range(len(output_names)):
|
860 |
+
io_binding.bind_output(output_names[i], 'cuda')
|
861 |
+
|
862 |
+
# Sync and run model
|
863 |
+
syncvec = self.syncvec.cpu()
|
864 |
+
self.scrdf_model.run_with_iobinding(io_binding)
|
865 |
+
|
866 |
+
net_outs = io_binding.copy_outputs_to_cpu()
|
867 |
+
|
868 |
+
input_height = det_img.shape[2]
|
869 |
+
input_width = det_img.shape[3]
|
870 |
+
|
871 |
+
fmc = 3
|
872 |
+
center_cache = {}
|
873 |
+
scores_list = []
|
874 |
+
bboxes_list = []
|
875 |
+
kpss_list = []
|
876 |
+
for idx, stride in enumerate([8, 16, 32]):
|
877 |
+
scores = net_outs[idx]
|
878 |
+
bbox_preds = net_outs[idx+fmc]
|
879 |
+
bbox_preds = bbox_preds * stride
|
880 |
+
|
881 |
+
kps_preds = net_outs[idx+fmc*2] * stride
|
882 |
+
height = input_height // stride
|
883 |
+
width = input_width // stride
|
884 |
+
K = height * width
|
885 |
+
key = (height, width, stride)
|
886 |
+
if key in center_cache:
|
887 |
+
anchor_centers = center_cache[key]
|
888 |
+
else:
|
889 |
+
anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
|
890 |
+
anchor_centers = (anchor_centers * stride).reshape( (-1, 2) )
|
891 |
+
anchor_centers = np.stack([anchor_centers]*2, axis=1).reshape( (-1,2) )
|
892 |
+
if len(center_cache)<100:
|
893 |
+
center_cache[key] = anchor_centers
|
894 |
+
|
895 |
+
pos_inds = np.where(scores>=score)[0]
|
896 |
+
|
897 |
+
x1 = anchor_centers[:, 0] - bbox_preds[:, 0]
|
898 |
+
y1 = anchor_centers[:, 1] - bbox_preds[:, 1]
|
899 |
+
x2 = anchor_centers[:, 0] + bbox_preds[:, 2]
|
900 |
+
y2 = anchor_centers[:, 1] + bbox_preds[:, 3]
|
901 |
+
|
902 |
+
bboxes = np.stack([x1, y1, x2, y2], axis=-1)
|
903 |
+
|
904 |
+
pos_scores = scores[pos_inds]
|
905 |
+
pos_bboxes = bboxes[pos_inds]
|
906 |
+
scores_list.append(pos_scores)
|
907 |
+
bboxes_list.append(pos_bboxes)
|
908 |
+
|
909 |
+
preds = []
|
910 |
+
for i in range(0, kps_preds.shape[1], 2):
|
911 |
+
px = anchor_centers[:, i%2] + kps_preds[:, i]
|
912 |
+
py = anchor_centers[:, i%2+1] + kps_preds[:, i+1]
|
913 |
+
|
914 |
+
preds.append(px)
|
915 |
+
preds.append(py)
|
916 |
+
kpss = np.stack(preds, axis=-1)
|
917 |
+
#kpss = kps_preds
|
918 |
+
kpss = kpss.reshape( (kpss.shape[0], -1, 2) )
|
919 |
+
pos_kpss = kpss[pos_inds]
|
920 |
+
kpss_list.append(pos_kpss)
|
921 |
+
|
922 |
+
scores = np.vstack(scores_list)
|
923 |
+
scores_ravel = scores.ravel()
|
924 |
+
order = scores_ravel.argsort()[::-1]
|
925 |
+
|
926 |
+
det_scale = det_scale.numpy()###
|
927 |
+
|
928 |
+
bboxes = np.vstack(bboxes_list) / det_scale
|
929 |
+
|
930 |
+
kpss = np.vstack(kpss_list) / det_scale
|
931 |
+
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
|
932 |
+
pre_det = pre_det[order, :]
|
933 |
+
|
934 |
+
dets = pre_det
|
935 |
+
thresh = 0.4
|
936 |
+
x1 = dets[:, 0]
|
937 |
+
y1 = dets[:, 1]
|
938 |
+
x2 = dets[:, 2]
|
939 |
+
y2 = dets[:, 3]
|
940 |
+
scoresb = dets[:, 4]
|
941 |
+
|
942 |
+
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
943 |
+
orderb = scoresb.argsort()[::-1]
|
944 |
+
|
945 |
+
keep = []
|
946 |
+
while orderb.size > 0:
|
947 |
+
i = orderb[0]
|
948 |
+
keep.append(i)
|
949 |
+
xx1 = np.maximum(x1[i], x1[orderb[1:]])
|
950 |
+
yy1 = np.maximum(y1[i], y1[orderb[1:]])
|
951 |
+
xx2 = np.minimum(x2[i], x2[orderb[1:]])
|
952 |
+
yy2 = np.minimum(y2[i], y2[orderb[1:]])
|
953 |
+
|
954 |
+
w = np.maximum(0.0, xx2 - xx1 + 1)
|
955 |
+
h = np.maximum(0.0, yy2 - yy1 + 1)
|
956 |
+
|
957 |
+
inter = w * h
|
958 |
+
ovr = inter / (areas[i] + areas[orderb[1:]] - inter)
|
959 |
+
|
960 |
+
inds = np.where(ovr <= thresh)[0]
|
961 |
+
orderb = orderb[inds + 1]
|
962 |
+
|
963 |
+
det = pre_det[keep, :]
|
964 |
+
|
965 |
+
kpss = kpss[order,:,:]
|
966 |
+
kpss = kpss[keep,:,:]
|
967 |
+
|
968 |
+
if max_num > 0 and det.shape[0] > max_num:
|
969 |
+
area = (det[:, 2] - det[:, 0]) * (det[:, 3] -
|
970 |
+
det[:, 1])
|
971 |
+
det_img_center = det_img.shape[0] // 2, det_img.shape[1] // 2
|
972 |
+
offsets = np.vstack([
|
973 |
+
(det[:, 0] + det[:, 2]) / 2 - det_img_center[1],
|
974 |
+
(det[:, 1] + det[:, 3]) / 2 - det_img_center[0]
|
975 |
+
])
|
976 |
+
offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
|
977 |
+
|
978 |
+
values = area - offset_dist_squared * 2.0 # some extra weight on the centering
|
979 |
+
bindex = np.argsort(values)[::-1] # some extra weight on the centering
|
980 |
+
bindex = bindex[0:max_num]
|
981 |
+
|
982 |
+
det = det[bindex, :]
|
983 |
+
if kpss is not None:
|
984 |
+
kpss = kpss[bindex, :]
|
985 |
+
|
986 |
+
score_values = det[:, 4]
|
987 |
+
# delete score column
|
988 |
+
det = np.delete(det, 4, 1)
|
989 |
+
|
990 |
+
if use_landmark_detection and len(kpss) > 0:
|
991 |
+
for i in range(kpss.shape[0]):
|
992 |
+
landmark_kpss, landmark_scores = self.run_detect_landmark(img_landmark, det[i], kpss[i], landmark_detect_mode, landmark_score, from_points)
|
993 |
+
if len(landmark_kpss) > 0:
|
994 |
+
if len(landmark_scores) > 0:
|
995 |
+
#print(np.mean(landmark_scores))
|
996 |
+
#print(np.mean(score_values[i]))
|
997 |
+
if np.mean(landmark_scores) > np.mean(score_values[i]):
|
998 |
+
kpss[i] = landmark_kpss
|
999 |
+
else:
|
1000 |
+
kpss[i] = landmark_kpss
|
1001 |
+
|
1002 |
+
return det, kpss
|
1003 |
+
|
1004 |
+
def detect_yoloface(self, img, max_num, score, use_landmark_detection, landmark_detect_mode, landmark_score, from_points):
|
1005 |
+
if use_landmark_detection:
|
1006 |
+
img_landmark = img.clone()
|
1007 |
+
|
1008 |
+
height = img.size(dim=1)
|
1009 |
+
width = img.size(dim=2)
|
1010 |
+
length = max((height, width))
|
1011 |
+
|
1012 |
+
image = torch.zeros((length, length, 3), dtype=torch.uint8, device='cuda')
|
1013 |
+
img = img.permute(1,2,0)
|
1014 |
+
|
1015 |
+
image[0:height, 0:width] = img
|
1016 |
+
scale = length/640.0
|
1017 |
+
image = torch.div(image, 255.0)
|
1018 |
+
|
1019 |
+
t640 = v2.Resize((640, 640), antialias=False)
|
1020 |
+
image = image.permute(2, 0, 1)
|
1021 |
+
image = t640(image)
|
1022 |
+
|
1023 |
+
image = torch.unsqueeze(image, 0).contiguous()
|
1024 |
+
|
1025 |
+
io_binding = self.yoloface_model.io_binding()
|
1026 |
+
io_binding.bind_input(name='images', device_type='cuda', device_id=0, element_type=np.float32, shape=image.size(), buffer_ptr=image.data_ptr())
|
1027 |
+
io_binding.bind_output('output0', 'cuda')
|
1028 |
+
|
1029 |
+
# Sync and run model
|
1030 |
+
self.syncvec.cpu()
|
1031 |
+
self.yoloface_model.run_with_iobinding(io_binding)
|
1032 |
+
|
1033 |
+
net_outs = io_binding.copy_outputs_to_cpu()
|
1034 |
+
|
1035 |
+
outputs = np.squeeze(net_outs).T
|
1036 |
+
|
1037 |
+
bbox_raw, score_raw, kps_raw = np.split(outputs, [4, 5], axis=1)
|
1038 |
+
|
1039 |
+
bbox_list = []
|
1040 |
+
score_list = []
|
1041 |
+
kps_list = []
|
1042 |
+
keep_indices = np.where(score_raw > score)[0]
|
1043 |
+
|
1044 |
+
if keep_indices.any():
|
1045 |
+
bbox_raw, kps_raw, score_raw = bbox_raw[keep_indices], kps_raw[keep_indices], score_raw[keep_indices]
|
1046 |
+
|
1047 |
+
bbox_raw = bbox_raw * scale
|
1048 |
+
|
1049 |
+
for bbox in bbox_raw:
|
1050 |
+
bbox_list.append(np.array([(bbox[0]-bbox[2]/2), (bbox[1]-bbox[3]/2), (bbox[0]+bbox[2]/2), (bbox[1]+bbox[3]/2)]))
|
1051 |
+
|
1052 |
+
kps_raw = kps_raw * scale
|
1053 |
+
|
1054 |
+
for kps in kps_raw:
|
1055 |
+
indexes = np.arange(0, len(kps), 3)
|
1056 |
+
temp_kps = []
|
1057 |
+
for index in indexes:
|
1058 |
+
temp_kps.append([kps[index], kps[index + 1]])
|
1059 |
+
kps_list.append(np.array(temp_kps))
|
1060 |
+
score_list = score_raw.ravel().tolist()
|
1061 |
+
|
1062 |
+
result_boxes = cv2.dnn.NMSBoxes(bbox_list, score_list, 0.25, 0.45, 0.5)
|
1063 |
+
|
1064 |
+
bboxes_list = []
|
1065 |
+
kpss_list = []
|
1066 |
+
for r in result_boxes:
|
1067 |
+
if r==max_num:
|
1068 |
+
break
|
1069 |
+
if use_landmark_detection and len(kps_list[r]) > 0:
|
1070 |
+
landmark_kpss, landmark_scores = self.run_detect_landmark(img_landmark, bbox_list[r], kps_list[r], landmark_detect_mode, landmark_score, from_points)
|
1071 |
+
if len(landmark_kpss) > 0:
|
1072 |
+
if len(landmark_scores) > 0:
|
1073 |
+
#print(np.mean(landmark_scores))
|
1074 |
+
#print(np.mean(score_list[r]))
|
1075 |
+
if np.mean(landmark_scores) > np.mean(score_list[r]):
|
1076 |
+
kps_list[r] = landmark_kpss
|
1077 |
+
else:
|
1078 |
+
kps_list[r] = landmark_kpss
|
1079 |
+
|
1080 |
+
bboxes_list.append(bbox_list[r])
|
1081 |
+
kpss_list.append(kps_list[r])
|
1082 |
+
|
1083 |
+
return np.array(bboxes_list), np.array(kpss_list)
|
1084 |
+
|
1085 |
+
def detect_yoloface2(self, image_in, max_num, score):
|
1086 |
+
img = image_in.detach().clone()
|
1087 |
+
|
1088 |
+
height = img.size(dim=1)
|
1089 |
+
width = img.size(dim=2)
|
1090 |
+
length = max((height, width))
|
1091 |
+
|
1092 |
+
image = torch.zeros((length, length, 3), dtype=torch.uint8,
|
1093 |
+
device='cuda')
|
1094 |
+
img = img.permute(1, 2, 0)
|
1095 |
+
|
1096 |
+
image[0:height, 0:width] = img
|
1097 |
+
scale = length / 640.0
|
1098 |
+
image = torch.div(image, 255.0)
|
1099 |
+
|
1100 |
+
t640 = v2.Resize((640, 640), antialias=False)
|
1101 |
+
image = image.permute(2, 0, 1)
|
1102 |
+
image = t640(image)
|
1103 |
+
|
1104 |
+
image = torch.unsqueeze(image, 0).contiguous()
|
1105 |
+
|
1106 |
+
io_binding = self.yoloface_model.io_binding()
|
1107 |
+
io_binding.bind_input(name='images', device_type='cuda', device_id=0,
|
1108 |
+
element_type=np.float32, shape=image.size(),
|
1109 |
+
buffer_ptr=image.data_ptr())
|
1110 |
+
io_binding.bind_output('output0', 'cuda')
|
1111 |
+
|
1112 |
+
# Sync and run model
|
1113 |
+
self.syncvec.cpu()
|
1114 |
+
self.yoloface_model.run_with_iobinding(io_binding)
|
1115 |
+
|
1116 |
+
net_outs = io_binding.copy_outputs_to_cpu()
|
1117 |
+
|
1118 |
+
outputs = np.squeeze(net_outs).T
|
1119 |
+
|
1120 |
+
bbox_raw, score_raw, kps_raw = np.split(outputs, [4, 5], axis=1)
|
1121 |
+
|
1122 |
+
bbox_list = []
|
1123 |
+
score_list = []
|
1124 |
+
kps_list = []
|
1125 |
+
keep_indices = np.where(score_raw > score)[0]
|
1126 |
+
|
1127 |
+
if keep_indices.any():
|
1128 |
+
bbox_raw, kps_raw, score_raw = bbox_raw[keep_indices], kps_raw[
|
1129 |
+
keep_indices], score_raw[keep_indices]
|
1130 |
+
for bbox in bbox_raw:
|
1131 |
+
bbox_list.append(np.array(
|
1132 |
+
[(bbox[0] - bbox[2] / 2), (bbox[1] - bbox[3] / 2),
|
1133 |
+
(bbox[0] + bbox[2] / 2), (bbox[1] + bbox[3] / 2)]))
|
1134 |
+
kps_raw = kps_raw * scale
|
1135 |
+
|
1136 |
+
for kps in kps_raw:
|
1137 |
+
indexes = np.arange(0, len(kps), 3)
|
1138 |
+
temp_kps = []
|
1139 |
+
for index in indexes:
|
1140 |
+
temp_kps.append([kps[index], kps[index + 1]])
|
1141 |
+
kps_list.append(np.array(temp_kps))
|
1142 |
+
score_list = score_raw.ravel().tolist()
|
1143 |
+
|
1144 |
+
result_boxes = cv2.dnn.NMSBoxes(bbox_list, score_list, 0.25, 0.45, 0.5)
|
1145 |
+
|
1146 |
+
result = []
|
1147 |
+
for r in result_boxes:
|
1148 |
+
if r == max_num:
|
1149 |
+
break
|
1150 |
+
bbox_list = bbox_list[r]
|
1151 |
+
result.append(kps_list[r])
|
1152 |
+
bbox_list = bbox_list*scale
|
1153 |
+
# print(bbox_list)
|
1154 |
+
# print(bbox_list*scale)
|
1155 |
+
|
1156 |
+
# img = image_in.detach().clone()
|
1157 |
+
# test = image_in.permute(1, 2, 0)
|
1158 |
+
# test = test.cpu().numpy()
|
1159 |
+
# cv2.imwrite('1.jpg', test)
|
1160 |
+
|
1161 |
+
# b_scale = 50
|
1162 |
+
# bbox_list[0] = bbox_list[0] - b_scale
|
1163 |
+
# bbox_list[1] = bbox_list[1] - b_scale
|
1164 |
+
# bbox_list[2] = bbox_list[2] + b_scale
|
1165 |
+
# bbox_list[3] = bbox_list[3] + b_scale
|
1166 |
+
|
1167 |
+
img = image_in.detach().clone()
|
1168 |
+
|
1169 |
+
img = img[:, int(bbox_list[1]):int(bbox_list[3]), int(bbox_list[0]):int(bbox_list[2])]
|
1170 |
+
# print(img.size())
|
1171 |
+
|
1172 |
+
|
1173 |
+
height = img.size(dim=1)
|
1174 |
+
width = img.size(dim=2)
|
1175 |
+
length = max((height, width))
|
1176 |
+
|
1177 |
+
image = torch.zeros((length, length, 3), dtype=torch.uint8, device='cuda')
|
1178 |
+
img = img.permute(1,2,0)
|
1179 |
+
|
1180 |
+
image[0:height, 0:width] = img
|
1181 |
+
scale = length/192
|
1182 |
+
image = torch.div(image, 255.0)
|
1183 |
+
|
1184 |
+
|
1185 |
+
t192 = v2.Resize((192, 192), antialias=False)
|
1186 |
+
image = image.permute(2, 0, 1)
|
1187 |
+
image = t192(image)
|
1188 |
+
|
1189 |
+
test = image_in.detach().clone().permute(1, 2, 0)
|
1190 |
+
test = test.cpu().numpy()
|
1191 |
+
|
1192 |
+
input_mean = 0.0
|
1193 |
+
input_std = 1.0
|
1194 |
+
|
1195 |
+
self.lmk_dim = 2
|
1196 |
+
self.lmk_num = 106
|
1197 |
+
|
1198 |
+
bbox = bbox_list
|
1199 |
+
w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
|
1200 |
+
center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
|
1201 |
+
rotate = 0
|
1202 |
+
_scale = 192 / (max(w, h) * 1.5)
|
1203 |
+
# print('param:', img.shape, bbox, center, self.input_size, _scale, rotate)
|
1204 |
+
aimg, M = self.transform(test, center, 192, _scale, rotate)
|
1205 |
+
input_size = tuple(aimg.shape[0:2][::-1])
|
1206 |
+
# assert input_size==self.input_size
|
1207 |
+
blob = cv2.dnn.blobFromImage(aimg, 1.0 / input_std, input_size, ( input_mean, input_mean, input_mean), swapRB=True)
|
1208 |
+
pred = self.insight106_model.run(['fc1'], {'data': blob})[0][0]
|
1209 |
+
if pred.shape[0] >= 3000:
|
1210 |
+
pred = pred.reshape((-1, 3))
|
1211 |
+
else:
|
1212 |
+
pred = pred.reshape((-1, 2))
|
1213 |
+
if self.lmk_num < pred.shape[0]:
|
1214 |
+
pred = pred[self.lmk_num * -1:, :]
|
1215 |
+
pred[:, 0:2] += 1
|
1216 |
+
pred[:, 0:2] *= 96
|
1217 |
+
if pred.shape[1] == 3:
|
1218 |
+
pred[:, 2] *= (106)
|
1219 |
+
|
1220 |
+
IM = cv2.invertAffineTransform(M)
|
1221 |
+
pred = self.trans_points2d(pred, IM)
|
1222 |
+
# face[self.taskname] = pred
|
1223 |
+
# if self.require_pose:
|
1224 |
+
# P = transform.estimate_affine_matrix_3d23d(self.mean_lmk, pred)
|
1225 |
+
# s, R, t = transform.P2sRt(P)
|
1226 |
+
# rx, ry, rz = transform.matrix2angle(R)
|
1227 |
+
# pose = np.array([rx, ry, rz], dtype=np.float32)
|
1228 |
+
# face['pose'] = pose # pitch, yaw, roll
|
1229 |
+
# print(pred.shape)
|
1230 |
+
# print(pred)
|
1231 |
+
|
1232 |
+
for point in pred:
|
1233 |
+
test[int(point[1])] [int(point[0])] [0] = 255
|
1234 |
+
test[int(point[1])] [int(point[0])] [1] = 255
|
1235 |
+
test[int(point[1])] [int(point[0])] [2] = 255
|
1236 |
+
cv2.imwrite('2.jpg', test)
|
1237 |
+
|
1238 |
+
predd = []
|
1239 |
+
predd.append(pred[38])
|
1240 |
+
predd.append(pred[88])
|
1241 |
+
# predd.append(pred[86])
|
1242 |
+
# predd.append(pred[52])
|
1243 |
+
# predd.append(pred[61])
|
1244 |
+
|
1245 |
+
predd.append(kps_list[0][2])
|
1246 |
+
predd.append(kps_list[0][3])
|
1247 |
+
predd.append(kps_list[0][4])
|
1248 |
+
|
1249 |
+
# for point in predd:
|
1250 |
+
# test[int(point[1])] [int(point[0])] [0] = 255
|
1251 |
+
# test[int(point[1])] [int(point[0])] [1] = 255
|
1252 |
+
# test[int(point[1])] [int(point[0])] [2] = 255
|
1253 |
+
# cv2.imwrite('2.jpg', test)
|
1254 |
+
preddd=[]
|
1255 |
+
preddd.append(predd)
|
1256 |
+
return np.array(preddd)
|
1257 |
+
def transform(self, data, center, output_size, scale, rotation):
|
1258 |
+
scale_ratio = scale
|
1259 |
+
rot = float(rotation) * np.pi / 180.0
|
1260 |
+
# translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
|
1261 |
+
t1 = trans.SimilarityTransform(scale=scale_ratio)
|
1262 |
+
cx = center[0] * scale_ratio
|
1263 |
+
cy = center[1] * scale_ratio
|
1264 |
+
t2 = trans.SimilarityTransform(translation=(-1 * cx, -1 * cy))
|
1265 |
+
t3 = trans.SimilarityTransform(rotation=rot)
|
1266 |
+
t4 = trans.SimilarityTransform(translation=(output_size / 2,
|
1267 |
+
output_size / 2))
|
1268 |
+
t = t1 + t2 + t3 + t4
|
1269 |
+
M = t.params[0:2]
|
1270 |
+
cropped = cv2.warpAffine(data,
|
1271 |
+
M, (output_size, output_size),
|
1272 |
+
borderValue=0.0)
|
1273 |
+
return cropped, M
|
1274 |
+
|
1275 |
+
def trans_points2d(self, pts, M):
|
1276 |
+
new_pts = np.zeros(shape=pts.shape, dtype=np.float32)
|
1277 |
+
for i in range(pts.shape[0]):
|
1278 |
+
pt = pts[i]
|
1279 |
+
new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32)
|
1280 |
+
new_pt = np.dot(M, new_pt)
|
1281 |
+
# print('new_pt', new_pt.shape, new_pt)
|
1282 |
+
new_pts[i] = new_pt[0:2]
|
1283 |
+
|
1284 |
+
return new_pts
|
1285 |
+
|
1286 |
+
# image = torch.unsqueeze(image, 0).contiguous()
|
1287 |
+
#
|
1288 |
+
# io_binding = self.insight106_model.io_binding()
|
1289 |
+
# io_binding.bind_input(name='data', device_type='cuda', device_id=0, element_type=np.float32, shape=image.size(), buffer_ptr=image.data_ptr())
|
1290 |
+
# io_binding.bind_output('fc1', 'cuda')
|
1291 |
+
#
|
1292 |
+
# # Sync and run model
|
1293 |
+
# self.syncvec.cpu()
|
1294 |
+
# self.insight106_model.run_with_iobinding(io_binding)
|
1295 |
+
#
|
1296 |
+
# net_outs = io_binding.copy_outputs_to_cpu()
|
1297 |
+
# print(net_outs)
|
1298 |
+
# net_outs[0][0] = net_outs[0][0]+1.
|
1299 |
+
# net_outs[0][0] = net_outs[0][0]/2.
|
1300 |
+
# net_outs[0][0] = net_outs[0][0]*96
|
1301 |
+
#
|
1302 |
+
# # net_outs[0] = net_outs[0]*scale
|
1303 |
+
# # print(net_outs)
|
1304 |
+
# test=test*255.0
|
1305 |
+
# for i in range(0, len(net_outs[0][0]), 2):
|
1306 |
+
# test[int(net_outs[0][0][i+1])] [int(net_outs[0][0][i])] [0] = 255
|
1307 |
+
# test[int(net_outs[0][0][i+1])] [int(net_outs[0][0][i])] [1] = 255
|
1308 |
+
# test[int(net_outs[0][0][i+1])] [int(net_outs[0][0][i])] [2] = 255
|
1309 |
+
# cv2.imwrite('2.jpg', test)
|
1310 |
+
#
|
1311 |
+
# return np.array(result)
|
1312 |
+
def detect_yunet(self, img, max_num, score, use_landmark_detection, landmark_detect_mode, landmark_score, from_points):
|
1313 |
+
if use_landmark_detection:
|
1314 |
+
img_landmark = img.clone()
|
1315 |
+
|
1316 |
+
height = img.size(dim=1)
|
1317 |
+
width = img.size(dim=2)
|
1318 |
+
input_size = (640, 640)
|
1319 |
+
im_ratio = float(height) / width
|
1320 |
+
model_ratio = float(input_size[1]) / input_size[0]
|
1321 |
+
if im_ratio > model_ratio:
|
1322 |
+
new_height = input_size[1]
|
1323 |
+
new_width = int(new_height / im_ratio)
|
1324 |
+
else:
|
1325 |
+
new_width = input_size[0]
|
1326 |
+
new_height = int(new_width * im_ratio)
|
1327 |
+
det_scale = float(new_height) / height
|
1328 |
+
|
1329 |
+
t640 = v2.Resize((new_height, new_width), antialias=False)
|
1330 |
+
img = t640(img)
|
1331 |
+
|
1332 |
+
# Switch to BGR
|
1333 |
+
img = img.permute(1,2,0)
|
1334 |
+
img = img[:, :, [2,1,0]]
|
1335 |
+
|
1336 |
+
image = torch.zeros((input_size[1], input_size[0], 3), dtype=torch.uint8, device='cuda')
|
1337 |
+
image[:new_height, :new_width, :] = img
|
1338 |
+
|
1339 |
+
image = image.permute(2, 0, 1)
|
1340 |
+
image = torch.unsqueeze(image, 0).contiguous()
|
1341 |
+
image = image.to(dtype=torch.float32)
|
1342 |
+
|
1343 |
+
input_name = self.yunet_model.get_inputs()[0].name
|
1344 |
+
outputs = self.yunet_model.get_outputs()
|
1345 |
+
output_names = []
|
1346 |
+
for o in outputs:
|
1347 |
+
output_names.append(o.name)
|
1348 |
+
|
1349 |
+
io_binding = self.yunet_model.io_binding()
|
1350 |
+
io_binding.bind_input(name=input_name, device_type='cuda', device_id=0, element_type=np.float32, shape=image.size(), buffer_ptr=image.data_ptr())
|
1351 |
+
|
1352 |
+
for i in range(len(output_names)):
|
1353 |
+
io_binding.bind_output(output_names[i], 'cuda')
|
1354 |
+
|
1355 |
+
# Sync and run model
|
1356 |
+
syncvec = self.syncvec.cpu()
|
1357 |
+
self.yunet_model.run_with_iobinding(io_binding)
|
1358 |
+
net_outs = io_binding.copy_outputs_to_cpu()
|
1359 |
+
|
1360 |
+
strides = [8, 16, 32]
|
1361 |
+
scores, bboxes, kpss = [], [], []
|
1362 |
+
for idx, stride in enumerate(strides):
|
1363 |
+
cls_pred = net_outs[idx].reshape(-1, 1)
|
1364 |
+
obj_pred = net_outs[idx + len(strides)].reshape(-1, 1)
|
1365 |
+
reg_pred = net_outs[idx + len(strides) * 2].reshape(-1, 4)
|
1366 |
+
kps_pred = net_outs[idx + len(strides) * 3].reshape(
|
1367 |
+
-1, 5 * 2)
|
1368 |
+
|
1369 |
+
anchor_centers = np.stack(
|
1370 |
+
np.mgrid[:(input_size[1] // stride), :(input_size[0] //
|
1371 |
+
stride)][::-1],
|
1372 |
+
axis=-1)
|
1373 |
+
anchor_centers = (anchor_centers * stride).astype(
|
1374 |
+
np.float32).reshape(-1, 2)
|
1375 |
+
|
1376 |
+
bbox_cxy = reg_pred[:, :2] * stride + anchor_centers[:]
|
1377 |
+
bbox_wh = np.exp(reg_pred[:, 2:]) * stride
|
1378 |
+
tl_x = (bbox_cxy[:, 0] - bbox_wh[:, 0] / 2.)
|
1379 |
+
tl_y = (bbox_cxy[:, 1] - bbox_wh[:, 1] / 2.)
|
1380 |
+
br_x = (bbox_cxy[:, 0] + bbox_wh[:, 0] / 2.)
|
1381 |
+
br_y = (bbox_cxy[:, 1] + bbox_wh[:, 1] / 2.)
|
1382 |
+
|
1383 |
+
bboxes.append(np.stack([tl_x, tl_y, br_x, br_y], -1))
|
1384 |
+
# for nk in range(5):
|
1385 |
+
per_kps = np.concatenate(
|
1386 |
+
[((kps_pred[:, [2 * i, 2 * i + 1]] * stride) + anchor_centers)
|
1387 |
+
for i in range(5)],
|
1388 |
+
axis=-1)
|
1389 |
+
|
1390 |
+
kpss.append(per_kps)
|
1391 |
+
scores.append(cls_pred * obj_pred)
|
1392 |
+
|
1393 |
+
scores = np.concatenate(scores, axis=0).reshape(-1)
|
1394 |
+
bboxes = np.concatenate(bboxes, axis=0)
|
1395 |
+
kpss = np.concatenate(kpss, axis=0)
|
1396 |
+
score_mask = (scores > score)
|
1397 |
+
scores = scores[score_mask]
|
1398 |
+
bboxes = bboxes[score_mask]
|
1399 |
+
kpss = kpss[score_mask]
|
1400 |
+
|
1401 |
+
bboxes /= det_scale
|
1402 |
+
kpss /= det_scale
|
1403 |
+
pre_det = np.hstack((bboxes, scores[:, None]))
|
1404 |
+
|
1405 |
+
dets = pre_det
|
1406 |
+
thresh = 0.4
|
1407 |
+
x1 = dets[:, 0]
|
1408 |
+
y1 = dets[:, 1]
|
1409 |
+
x2 = dets[:, 2]
|
1410 |
+
y2 = dets[:, 3]
|
1411 |
+
scoresb = dets[:, -1]
|
1412 |
+
|
1413 |
+
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
1414 |
+
order = scoresb.argsort()[::-1]
|
1415 |
+
|
1416 |
+
keep = []
|
1417 |
+
while order.size > 0:
|
1418 |
+
i = order[0]
|
1419 |
+
keep.append(i)
|
1420 |
+
xx1 = np.maximum(x1[i], x1[order[1:]])
|
1421 |
+
yy1 = np.maximum(y1[i], y1[order[1:]])
|
1422 |
+
xx2 = np.minimum(x2[i], x2[order[1:]])
|
1423 |
+
yy2 = np.minimum(y2[i], y2[order[1:]])
|
1424 |
+
|
1425 |
+
w = np.maximum(0.0, xx2 - xx1 + 1)
|
1426 |
+
h = np.maximum(0.0, yy2 - yy1 + 1)
|
1427 |
+
inter = w * h
|
1428 |
+
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
1429 |
+
|
1430 |
+
inds = np.where(ovr <= thresh)[0]
|
1431 |
+
order = order[inds + 1]
|
1432 |
+
|
1433 |
+
kpss = kpss[keep, :]
|
1434 |
+
bboxes = pre_det[keep, :]
|
1435 |
+
score_values = bboxes[:, 4]
|
1436 |
+
|
1437 |
+
bbox_list = []
|
1438 |
+
kps_list = []
|
1439 |
+
for i in range(bboxes.shape[0]):
|
1440 |
+
if i==max_num:
|
1441 |
+
break
|
1442 |
+
box = np.array((bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]))
|
1443 |
+
bbox_list.append(box)
|
1444 |
+
|
1445 |
+
if kpss is not None:
|
1446 |
+
kps = kpss[i].reshape(-1, 2)
|
1447 |
+
if use_landmark_detection and len(kps) > 0:
|
1448 |
+
landmark_kpss, landmark_scores = self.run_detect_landmark(img_landmark, box, kps, landmark_detect_mode, landmark_score, from_points)
|
1449 |
+
if len(landmark_kpss) > 0:
|
1450 |
+
if len(landmark_scores) > 0:
|
1451 |
+
#print(np.mean(landmark_scores))
|
1452 |
+
#print(np.mean(score_values[i]))
|
1453 |
+
if np.mean(landmark_scores) > np.mean(score_values[i]):
|
1454 |
+
kps = landmark_kpss
|
1455 |
+
else:
|
1456 |
+
kps = landmark_kpss
|
1457 |
+
|
1458 |
+
kps_list.append(kps)
|
1459 |
+
|
1460 |
+
return np.array(bbox_list), np.array(kps_list)
|
1461 |
+
|
1462 |
+
def detect_face_landmark_5(self, img, bbox, det_kpss, from_points=False):
|
1463 |
+
if from_points == False:
|
1464 |
+
w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
|
1465 |
+
center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
|
1466 |
+
rotate = 0
|
1467 |
+
_scale = 512.0 / (max(w, h)*1.5)
|
1468 |
+
image, M = faceutil.transform(img, center, 512, _scale, rotate)
|
1469 |
+
else:
|
1470 |
+
image, M = faceutil.warp_face_by_face_landmark_5(img, det_kpss, 512, normalized=True)
|
1471 |
+
|
1472 |
+
image = image.permute(1,2,0)
|
1473 |
+
|
1474 |
+
mean = torch.tensor([104, 117, 123], dtype=torch.float32, device='cuda')
|
1475 |
+
image = torch.sub(image, mean)
|
1476 |
+
|
1477 |
+
image = image.permute(2,0,1)
|
1478 |
+
image = torch.reshape(image, (1, 3, 512, 512))
|
1479 |
+
|
1480 |
+
height, width = (512, 512)
|
1481 |
+
tmp = [width, height, width, height, width, height, width, height, width, height]
|
1482 |
+
scale1 = torch.tensor(tmp, dtype=torch.float32, device='cuda')
|
1483 |
+
|
1484 |
+
conf = torch.empty((1,10752,2), dtype=torch.float32, device='cuda').contiguous()
|
1485 |
+
landmarks = torch.empty((1,10752,10), dtype=torch.float32, device='cuda').contiguous()
|
1486 |
+
|
1487 |
+
io_binding = self.resnet50_model.io_binding()
|
1488 |
+
io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
|
1489 |
+
io_binding.bind_output(name='conf', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,10752,2), buffer_ptr=conf.data_ptr())
|
1490 |
+
io_binding.bind_output(name='landmarks', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,10752,10), buffer_ptr=landmarks.data_ptr())
|
1491 |
+
|
1492 |
+
torch.cuda.synchronize('cuda')
|
1493 |
+
self.resnet50_model.run_with_iobinding(io_binding)
|
1494 |
+
|
1495 |
+
scores = torch.squeeze(conf)[:, 1]
|
1496 |
+
priors = torch.tensor(self.anchors).view(-1, 4)
|
1497 |
+
priors = priors.to('cuda')
|
1498 |
+
|
1499 |
+
pre = torch.squeeze(landmarks, 0)
|
1500 |
+
|
1501 |
+
tmp = (priors[:, :2] + pre[:, :2] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 2:4] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 4:6] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 6:8] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 8:10] * 0.1 * priors[:, 2:])
|
1502 |
+
landmarks = torch.cat(tmp, dim=1)
|
1503 |
+
landmarks = torch.mul(landmarks, scale1)
|
1504 |
+
|
1505 |
+
landmarks = landmarks.cpu().numpy()
|
1506 |
+
|
1507 |
+
# ignore low scores
|
1508 |
+
score=.1
|
1509 |
+
inds = torch.where(scores>score)[0]
|
1510 |
+
inds = inds.cpu().numpy()
|
1511 |
+
scores = scores.cpu().numpy()
|
1512 |
+
|
1513 |
+
landmarks, scores = landmarks[inds], scores[inds]
|
1514 |
+
|
1515 |
+
# sort
|
1516 |
+
order = scores.argsort()[::-1]
|
1517 |
+
|
1518 |
+
if len(order) > 0:
|
1519 |
+
landmarks = landmarks[order][0]
|
1520 |
+
scores = scores[order][0]
|
1521 |
+
|
1522 |
+
landmarks = np.array([[landmarks[i], landmarks[i + 1]] for i in range(0,10,2)])
|
1523 |
+
|
1524 |
+
IM = faceutil.invertAffineTransform(M)
|
1525 |
+
landmarks = faceutil.trans_points2d(landmarks, IM)
|
1526 |
+
scores = np.array([scores])
|
1527 |
+
|
1528 |
+
#faceutil.test_bbox_landmarks(img, bbox, landmarks)
|
1529 |
+
#print(scores)
|
1530 |
+
|
1531 |
+
return landmarks, scores
|
1532 |
+
|
1533 |
+
return [], []
|
1534 |
+
|
1535 |
+
def detect_face_landmark_68(self, img, bbox, det_kpss, convert68_5=True, from_points=False):
|
1536 |
+
if from_points == False:
|
1537 |
+
crop_image, affine_matrix = faceutil.warp_face_by_bounding_box_for_landmark_68(img, bbox, (256, 256))
|
1538 |
+
else:
|
1539 |
+
crop_image, affine_matrix = faceutil.warp_face_by_face_landmark_5(img, det_kpss, 256, normalized=True)
|
1540 |
+
'''
|
1541 |
+
cv2.imshow('image', crop_image.permute(1, 2, 0).to('cpu').numpy())
|
1542 |
+
cv2.waitKey(0)
|
1543 |
+
cv2.destroyAllWindows()
|
1544 |
+
'''
|
1545 |
+
crop_image = crop_image.to(dtype=torch.float32)
|
1546 |
+
crop_image = torch.div(crop_image, 255.0)
|
1547 |
+
crop_image = torch.unsqueeze(crop_image, 0).contiguous()
|
1548 |
+
|
1549 |
+
io_binding = self.face_landmark_68_model.io_binding()
|
1550 |
+
io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=crop_image.size(), buffer_ptr=crop_image.data_ptr())
|
1551 |
+
|
1552 |
+
io_binding.bind_output('landmarks_xyscore', 'cuda')
|
1553 |
+
io_binding.bind_output('heatmaps', 'cuda')
|
1554 |
+
|
1555 |
+
# Sync and run model
|
1556 |
+
syncvec = self.syncvec.cpu()
|
1557 |
+
self.face_landmark_68_model.run_with_iobinding(io_binding)
|
1558 |
+
net_outs = io_binding.copy_outputs_to_cpu()
|
1559 |
+
face_landmark_68 = net_outs[0]
|
1560 |
+
face_heatmap = net_outs[1]
|
1561 |
+
|
1562 |
+
face_landmark_68 = face_landmark_68[:, :, :2][0] / 64.0
|
1563 |
+
face_landmark_68 = face_landmark_68.reshape(1, -1, 2) * 256.0
|
1564 |
+
face_landmark_68 = cv2.transform(face_landmark_68, cv2.invertAffineTransform(affine_matrix))
|
1565 |
+
|
1566 |
+
face_landmark_68 = face_landmark_68.reshape(-1, 2)
|
1567 |
+
face_landmark_68_score = np.amax(face_heatmap, axis = (2, 3))
|
1568 |
+
face_landmark_68_score = face_landmark_68_score.reshape(-1, 1)
|
1569 |
+
|
1570 |
+
if convert68_5:
|
1571 |
+
face_landmark_68, face_landmark_68_score = faceutil.convert_face_landmark_68_to_5(face_landmark_68, face_landmark_68_score)
|
1572 |
+
|
1573 |
+
#faceutil.test_bbox_landmarks(img, bbox, face_landmark_68)
|
1574 |
+
|
1575 |
+
return face_landmark_68, face_landmark_68_score
|
1576 |
+
|
1577 |
+
def detect_face_landmark_3d68(self, img, bbox, det_kpss, convert68_5=True, from_points=False):
|
1578 |
+
if from_points == False:
|
1579 |
+
w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
|
1580 |
+
center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
|
1581 |
+
rotate = 0
|
1582 |
+
_scale = 192 / (max(w, h)*1.5)
|
1583 |
+
#print('param:', img.size(), bbox, center, (192, 192), _scale, rotate)
|
1584 |
+
aimg, M = faceutil.transform(img, center, 192, _scale, rotate)
|
1585 |
+
else:
|
1586 |
+
aimg, M = faceutil.warp_face_by_face_landmark_5(img, det_kpss, image_size=192, normalized=True)
|
1587 |
+
'''
|
1588 |
+
cv2.imshow('image', aimg.permute(1.2.0).to('cpu').numpy())
|
1589 |
+
cv2.waitKey(0)
|
1590 |
+
cv2.destroyAllWindows()
|
1591 |
+
'''
|
1592 |
+
aimg = torch.unsqueeze(aimg, 0).contiguous()
|
1593 |
+
aimg = aimg.to(dtype=torch.float32)
|
1594 |
+
aimg = self.normalize(aimg)
|
1595 |
+
io_binding = self.face_landmark_3d68_model.io_binding()
|
1596 |
+
io_binding.bind_input(name='data', device_type='cuda', device_id=0, element_type=np.float32, shape=aimg.size(), buffer_ptr=aimg.data_ptr())
|
1597 |
+
|
1598 |
+
io_binding.bind_output('fc1', 'cuda')
|
1599 |
+
|
1600 |
+
# Sync and run model
|
1601 |
+
syncvec = self.syncvec.cpu()
|
1602 |
+
self.face_landmark_3d68_model.run_with_iobinding(io_binding)
|
1603 |
+
pred = io_binding.copy_outputs_to_cpu()[0][0]
|
1604 |
+
|
1605 |
+
if pred.shape[0] >= 3000:
|
1606 |
+
pred = pred.reshape((-1, 3))
|
1607 |
+
else:
|
1608 |
+
pred = pred.reshape((-1, 2))
|
1609 |
+
if 68 < pred.shape[0]:
|
1610 |
+
pred = pred[68*-1:,:]
|
1611 |
+
pred[:, 0:2] += 1
|
1612 |
+
pred[:, 0:2] *= (192 // 2)
|
1613 |
+
if pred.shape[1] == 3:
|
1614 |
+
pred[:, 2] *= (192 // 2)
|
1615 |
+
|
1616 |
+
#IM = cv2.invertAffineTransform(M)
|
1617 |
+
IM = faceutil.invertAffineTransform(M)
|
1618 |
+
pred = faceutil.trans_points3d(pred, IM)
|
1619 |
+
|
1620 |
+
# at moment we don't use 3d points
|
1621 |
+
'''
|
1622 |
+
P = faceutil.estimate_affine_matrix_3d23d(self.mean_lmk, pred)
|
1623 |
+
s, R, t = faceutil.P2sRt(P)
|
1624 |
+
rx, ry, rz = faceutil.matrix2angle(R)
|
1625 |
+
pose = np.array( [rx, ry, rz], dtype=np.float32 ) #pitch, yaw, roll
|
1626 |
+
'''
|
1627 |
+
|
1628 |
+
# convert from 3d68 to 2d68 keypoints
|
1629 |
+
landmark2d68 = np.array(pred[:, [0, 1]])
|
1630 |
+
|
1631 |
+
if convert68_5:
|
1632 |
+
# convert from 68 to 5 keypoints
|
1633 |
+
landmark2d68, _ = faceutil.convert_face_landmark_68_to_5(landmark2d68, [])
|
1634 |
+
|
1635 |
+
#faceutil.test_bbox_landmarks(img, bbox, landmark2d68)
|
1636 |
+
|
1637 |
+
return landmark2d68, []
|
1638 |
+
|
1639 |
+
def detect_face_landmark_98(self, img, bbox, det_kpss, convert98_5=True, from_points=False):
|
1640 |
+
if from_points == False:
|
1641 |
+
crop_image, detail = faceutil.warp_face_by_bounding_box_for_landmark_98(img, bbox, (256, 256))
|
1642 |
+
else:
|
1643 |
+
crop_image, M = faceutil.warp_face_by_face_landmark_5(img, det_kpss, image_size=256, normalized=True)
|
1644 |
+
#crop_image2 = crop_image.clone()
|
1645 |
+
h, w = (crop_image.size(dim=1), crop_image.size(dim=2))
|
1646 |
+
'''
|
1647 |
+
cv2.imshow('image', crop_image.permute(1, 2, 0).to('cpu').numpy())
|
1648 |
+
cv2.waitKey(0)
|
1649 |
+
cv2.destroyAllWindows()
|
1650 |
+
'''
|
1651 |
+
landmark = []
|
1652 |
+
landmark_score = []
|
1653 |
+
if crop_image is not None:
|
1654 |
+
crop_image = crop_image.to(dtype=torch.float32)
|
1655 |
+
crop_image = torch.div(crop_image, 255.0)
|
1656 |
+
crop_image = torch.unsqueeze(crop_image, 0).contiguous()
|
1657 |
+
|
1658 |
+
io_binding = self.face_landmark_98_model.io_binding()
|
1659 |
+
io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=crop_image.size(), buffer_ptr=crop_image.data_ptr())
|
1660 |
+
|
1661 |
+
io_binding.bind_output('landmarks_xyscore', 'cuda')
|
1662 |
+
|
1663 |
+
# Sync and run model
|
1664 |
+
syncvec = self.syncvec.cpu()
|
1665 |
+
self.face_landmark_98_model.run_with_iobinding(io_binding)
|
1666 |
+
landmarks_xyscore = io_binding.copy_outputs_to_cpu()[0]
|
1667 |
+
|
1668 |
+
if len(landmarks_xyscore) > 0:
|
1669 |
+
for one_face_landmarks in landmarks_xyscore:
|
1670 |
+
landmark_score = one_face_landmarks[:, [2]].reshape(-1)
|
1671 |
+
landmark = one_face_landmarks[:, [0, 1]].reshape(-1,2)
|
1672 |
+
|
1673 |
+
##recorver, and grouped as [98,2]
|
1674 |
+
if from_points == False:
|
1675 |
+
landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3] - detail[4]
|
1676 |
+
landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2] - detail[4]
|
1677 |
+
else:
|
1678 |
+
landmark[:, 0] = landmark[:, 0] * w
|
1679 |
+
landmark[:, 1] = landmark[:, 1] * h
|
1680 |
+
#lmk = landmark.copy()
|
1681 |
+
#lmk_score = landmark_score.copy()
|
1682 |
+
|
1683 |
+
#IM = cv2.invertAffineTransform(M)
|
1684 |
+
IM = faceutil.invertAffineTransform(M)
|
1685 |
+
landmark = faceutil.trans_points2d(landmark, IM)
|
1686 |
+
|
1687 |
+
if convert98_5:
|
1688 |
+
landmark, landmark_score = faceutil.convert_face_landmark_98_to_5(landmark, landmark_score)
|
1689 |
+
#lmk, lmk_score = faceutil.convert_face_landmark_98_to_5(lmk, lmk_score)
|
1690 |
+
|
1691 |
+
#faceutil.test_bbox_landmarks(crop_image2, [], lmk)
|
1692 |
+
#faceutil.test_bbox_landmarks(img, bbox, landmark)
|
1693 |
+
#faceutil.test_bbox_landmarks(img, bbox, det_kpss)
|
1694 |
+
|
1695 |
+
return landmark, landmark_score
|
1696 |
+
|
1697 |
+
def detect_face_landmark_106(self, img, bbox, det_kpss, convert106_5=True, from_points=False):
|
1698 |
+
if from_points == False:
|
1699 |
+
w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
|
1700 |
+
center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
|
1701 |
+
rotate = 0
|
1702 |
+
_scale = 192 / (max(w, h)*1.5)
|
1703 |
+
#print('param:', img.size(), bbox, center, (192, 192), _scale, rotate)
|
1704 |
+
aimg, M = faceutil.transform(img, center, 192, _scale, rotate)
|
1705 |
+
else:
|
1706 |
+
aimg, M = faceutil.warp_face_by_face_landmark_5(img, det_kpss, image_size=192, normalized=True)
|
1707 |
+
'''
|
1708 |
+
cv2.imshow('image', aimg.permute(1.2.0).to('cpu').numpy())
|
1709 |
+
cv2.waitKey(0)
|
1710 |
+
cv2.destroyAllWindows()
|
1711 |
+
'''
|
1712 |
+
aimg = torch.unsqueeze(aimg, 0).contiguous()
|
1713 |
+
aimg = aimg.to(dtype=torch.float32)
|
1714 |
+
aimg = self.normalize(aimg)
|
1715 |
+
io_binding = self.face_landmark_106_model.io_binding()
|
1716 |
+
io_binding.bind_input(name='data', device_type='cuda', device_id=0, element_type=np.float32, shape=aimg.size(), buffer_ptr=aimg.data_ptr())
|
1717 |
+
|
1718 |
+
io_binding.bind_output('fc1', 'cuda')
|
1719 |
+
|
1720 |
+
# Sync and run model
|
1721 |
+
syncvec = self.syncvec.cpu()
|
1722 |
+
self.face_landmark_106_model.run_with_iobinding(io_binding)
|
1723 |
+
pred = io_binding.copy_outputs_to_cpu()[0][0]
|
1724 |
+
|
1725 |
+
if pred.shape[0] >= 3000:
|
1726 |
+
pred = pred.reshape((-1, 3))
|
1727 |
+
else:
|
1728 |
+
pred = pred.reshape((-1, 2))
|
1729 |
+
|
1730 |
+
if 106 < pred.shape[0]:
|
1731 |
+
pred = pred[106*-1:,:]
|
1732 |
+
|
1733 |
+
pred[:, 0:2] += 1
|
1734 |
+
pred[:, 0:2] *= (192 // 2)
|
1735 |
+
if pred.shape[1] == 3:
|
1736 |
+
pred[:, 2] *= (192 // 2)
|
1737 |
+
|
1738 |
+
#IM = cv2.invertAffineTransform(M)
|
1739 |
+
IM = faceutil.invertAffineTransform(M)
|
1740 |
+
pred = faceutil.trans_points(pred, IM)
|
1741 |
+
|
1742 |
+
if pred is not None:
|
1743 |
+
if convert106_5:
|
1744 |
+
# convert from 106 to 5 keypoints
|
1745 |
+
pred = faceutil.convert_face_landmark_106_to_5(pred)
|
1746 |
+
|
1747 |
+
#faceutil.test_bbox_landmarks(img, bbox, pred)
|
1748 |
+
|
1749 |
+
return pred, []
|
1750 |
+
|
1751 |
+
def detect_face_landmark_478(self, img, bbox, det_kpss, convert478_5=True, from_points=False):
|
1752 |
+
if from_points == False:
|
1753 |
+
w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
|
1754 |
+
center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
|
1755 |
+
rotate = 0
|
1756 |
+
_scale = 256.0 / (max(w, h)*1.5)
|
1757 |
+
#print('param:', img.size(), bbox, center, (192, 192), _scale, rotate)
|
1758 |
+
aimg, M = faceutil.transform(img, center, 256, _scale, rotate)
|
1759 |
+
else:
|
1760 |
+
aimg, M = faceutil.warp_face_by_face_landmark_5(img, det_kpss, 256, normalized=False)
|
1761 |
+
#aimg2 = aimg.clone()
|
1762 |
+
'''
|
1763 |
+
cv2.imshow('image', aimg.permute(1,2,0).to('cpu').numpy())
|
1764 |
+
cv2.waitKey(0)
|
1765 |
+
cv2.destroyAllWindows()
|
1766 |
+
'''
|
1767 |
+
aimg = torch.unsqueeze(aimg, 0).contiguous()
|
1768 |
+
aimg = aimg.to(dtype=torch.float32)
|
1769 |
+
aimg = torch.div(aimg, 255.0)
|
1770 |
+
io_binding = self.face_landmark_478_model.io_binding()
|
1771 |
+
io_binding.bind_input(name='input_12', device_type='cuda', device_id=0, element_type=np.float32, shape=aimg.size(), buffer_ptr=aimg.data_ptr())
|
1772 |
+
|
1773 |
+
io_binding.bind_output('Identity', 'cuda')
|
1774 |
+
io_binding.bind_output('Identity_1', 'cuda')
|
1775 |
+
io_binding.bind_output('Identity_2', 'cuda')
|
1776 |
+
|
1777 |
+
# Sync and run model
|
1778 |
+
syncvec = self.syncvec.cpu()
|
1779 |
+
self.face_landmark_478_model.run_with_iobinding(io_binding)
|
1780 |
+
landmarks, faceflag, blendshapes = io_binding.copy_outputs_to_cpu()
|
1781 |
+
landmarks = landmarks.reshape( (1,478,3))
|
1782 |
+
|
1783 |
+
landmark = []
|
1784 |
+
landmark_score = []
|
1785 |
+
if len(landmarks) > 0:
|
1786 |
+
for one_face_landmarks in landmarks:
|
1787 |
+
#lmk = one_face_landmarks.copy()
|
1788 |
+
landmark = one_face_landmarks
|
1789 |
+
#IM = cv2.invertAffineTransform(M)
|
1790 |
+
IM = faceutil.invertAffineTransform(M)
|
1791 |
+
landmark = faceutil.trans_points3d(landmark, IM)
|
1792 |
+
'''
|
1793 |
+
P = faceutil.estimate_affine_matrix_3d23d(self.mean_lmk, landmark)
|
1794 |
+
s, R, t = faceutil.P2sRt(P)
|
1795 |
+
rx, ry, rz = faceutil.matrix2angle(R)
|
1796 |
+
pose = np.array( [rx, ry, rz], dtype=np.float32 ) #pitch, yaw, roll
|
1797 |
+
'''
|
1798 |
+
landmark = landmark[:, [0, 1]].reshape(-1,2)
|
1799 |
+
#lmk = lmk[:, [0, 1]].reshape(-1,2)
|
1800 |
+
|
1801 |
+
#get scores
|
1802 |
+
landmark_for_score = landmark[self.LandmarksSubsetIdxs]
|
1803 |
+
landmark_for_score = landmark_for_score[:, :2]
|
1804 |
+
landmark_for_score = np.expand_dims(landmark_for_score, axis=0)
|
1805 |
+
landmark_for_score = landmark_for_score.astype(np.float32)
|
1806 |
+
landmark_for_score = torch.from_numpy(landmark_for_score).to('cuda')
|
1807 |
+
|
1808 |
+
io_binding_bs = self.face_blendshapes_model.io_binding()
|
1809 |
+
io_binding_bs.bind_input(name='input_points', device_type='cuda', device_id=0, element_type=np.float32, shape=tuple(landmark_for_score.shape), buffer_ptr=landmark_for_score.data_ptr())
|
1810 |
+
io_binding_bs.bind_output('output', 'cuda')
|
1811 |
+
|
1812 |
+
# Sync and run model
|
1813 |
+
syncvec = self.syncvec.cpu()
|
1814 |
+
self.face_blendshapes_model.run_with_iobinding(io_binding_bs)
|
1815 |
+
landmark_score = io_binding_bs.copy_outputs_to_cpu()[0]
|
1816 |
+
|
1817 |
+
if convert478_5:
|
1818 |
+
# convert from 478 to 5 keypoints
|
1819 |
+
landmark = faceutil.convert_face_landmark_478_to_5(landmark)
|
1820 |
+
#lmk = faceutil.convert_face_landmark_478_to_5(lmk)
|
1821 |
+
|
1822 |
+
#faceutil.test_bbox_landmarks(aimg2, [], lmk)
|
1823 |
+
#faceutil.test_bbox_landmarks(img, bbox, landmark)
|
1824 |
+
#faceutil.test_bbox_landmarks(img, bbox, det_kpss)
|
1825 |
+
|
1826 |
+
#return landmark, landmark_score
|
1827 |
+
return landmark, []
|
1828 |
+
|
1829 |
+
def recognize(self, img, face_kps):
|
1830 |
+
'''
|
1831 |
+
# Find transform
|
1832 |
+
dst = self.arcface_dst.copy()
|
1833 |
+
dst[:, 0] += 8.0
|
1834 |
+
|
1835 |
+
tform = trans.SimilarityTransform()
|
1836 |
+
tform.estimate(face_kps, dst)
|
1837 |
+
|
1838 |
+
# Transform
|
1839 |
+
img = v2.functional.affine(img, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0) )
|
1840 |
+
img = v2.functional.crop(img, 0,0, 128, 128)
|
1841 |
+
img = v2.Resize((112, 112), interpolation=v2.InterpolationMode.BILINEAR, antialias=False)(img)
|
1842 |
+
'''
|
1843 |
+
# Find transform
|
1844 |
+
tform = trans.SimilarityTransform()
|
1845 |
+
tform.estimate(face_kps, self.arcface_dst)
|
1846 |
+
|
1847 |
+
# Transform
|
1848 |
+
img = v2.functional.affine(img, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0) )
|
1849 |
+
img = v2.functional.crop(img, 0,0, 112, 112)
|
1850 |
+
|
1851 |
+
cropped_image = img
|
1852 |
+
# Switch to BGR and normalize
|
1853 |
+
img = img.permute(1,2,0) #112,112,3
|
1854 |
+
img = img[:, :, [2,1,0]]
|
1855 |
+
img = torch.sub(img, 127.5)
|
1856 |
+
img = torch.div(img, 127.5)
|
1857 |
+
img = img.permute(2, 0, 1) #3,112,112
|
1858 |
+
|
1859 |
+
# Prepare data and find model parameters
|
1860 |
+
img = torch.unsqueeze(img, 0).contiguous()
|
1861 |
+
input_name = self.recognition_model.get_inputs()[0].name
|
1862 |
+
|
1863 |
+
outputs = self.recognition_model.get_outputs()
|
1864 |
+
output_names = []
|
1865 |
+
for o in outputs:
|
1866 |
+
output_names.append(o.name)
|
1867 |
+
|
1868 |
+
io_binding = self.recognition_model.io_binding()
|
1869 |
+
io_binding.bind_input(name=input_name, device_type='cuda', device_id=0, element_type=np.float32, shape=img.size(), buffer_ptr=img.data_ptr())
|
1870 |
+
|
1871 |
+
for i in range(len(output_names)):
|
1872 |
+
io_binding.bind_output(output_names[i], 'cuda')
|
1873 |
+
|
1874 |
+
# Sync and run model
|
1875 |
+
self.syncvec.cpu()
|
1876 |
+
self.recognition_model.run_with_iobinding(io_binding)
|
1877 |
+
|
1878 |
+
# Return embedding
|
1879 |
+
return np.array(io_binding.copy_outputs_to_cpu()).flatten(), cropped_image
|
1880 |
+
|
1881 |
+
def resnet50(self, image, score=.5):
|
1882 |
+
if not self.resnet50_model:
|
1883 |
+
self.resnet50_model = onnxruntime.InferenceSession("./models/res50.onnx", providers=self.providers)
|
1884 |
+
|
1885 |
+
feature_maps = [[64, 64], [32, 32], [16, 16]]
|
1886 |
+
min_sizes = [[16, 32], [64, 128], [256, 512]]
|
1887 |
+
steps = [8, 16, 32]
|
1888 |
+
image_size = 512
|
1889 |
+
|
1890 |
+
for k, f in enumerate(feature_maps):
|
1891 |
+
min_size_array = min_sizes[k]
|
1892 |
+
for i, j in product(range(f[0]), range(f[1])):
|
1893 |
+
for min_size in min_size_array:
|
1894 |
+
s_kx = min_size / image_size
|
1895 |
+
s_ky = min_size / image_size
|
1896 |
+
dense_cx = [x * steps[k] / image_size for x in [j + 0.5]]
|
1897 |
+
dense_cy = [y * steps[k] / image_size for y in [i + 0.5]]
|
1898 |
+
for cy, cx in product(dense_cy, dense_cx):
|
1899 |
+
self.anchors += [cx, cy, s_kx, s_ky]
|
1900 |
+
|
1901 |
+
# image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
|
1902 |
+
image = image.permute(1,2,0)
|
1903 |
+
|
1904 |
+
# image = image - [104, 117, 123]
|
1905 |
+
mean = torch.tensor([104, 117, 123], dtype=torch.float32, device='cuda')
|
1906 |
+
image = torch.sub(image, mean)
|
1907 |
+
|
1908 |
+
# image = image.transpose(2, 0, 1)
|
1909 |
+
# image = np.float32(image[np.newaxis,:,:,:])
|
1910 |
+
image = image.permute(2,0,1)
|
1911 |
+
image = torch.reshape(image, (1, 3, 512, 512))
|
1912 |
+
|
1913 |
+
height, width = (512, 512)
|
1914 |
+
tmp = [width, height, width, height, width, height, width, height, width, height]
|
1915 |
+
scale1 = torch.tensor(tmp, dtype=torch.float32, device='cuda')
|
1916 |
+
|
1917 |
+
# ort_inputs = {"input": image}
|
1918 |
+
conf = torch.empty((1,10752,2), dtype=torch.float32, device='cuda').contiguous()
|
1919 |
+
landmarks = torch.empty((1,10752,10), dtype=torch.float32, device='cuda').contiguous()
|
1920 |
+
|
1921 |
+
io_binding = self.resnet50_model.io_binding()
|
1922 |
+
io_binding.bind_input(name='input', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,3,512,512), buffer_ptr=image.data_ptr())
|
1923 |
+
io_binding.bind_output(name='conf', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,10752,2), buffer_ptr=conf.data_ptr())
|
1924 |
+
io_binding.bind_output(name='landmarks', device_type='cuda', device_id=0, element_type=np.float32, shape=(1,10752,10), buffer_ptr=landmarks.data_ptr())
|
1925 |
+
|
1926 |
+
# _, conf, landmarks = self.resnet_model.run(None, ort_inputs)
|
1927 |
+
torch.cuda.synchronize('cuda')
|
1928 |
+
self.resnet50_model.run_with_iobinding(io_binding)
|
1929 |
+
|
1930 |
+
# conf = torch.from_numpy(conf)
|
1931 |
+
# scores = conf.squeeze(0).numpy()[:, 1]
|
1932 |
+
scores = torch.squeeze(conf)[:, 1]
|
1933 |
+
|
1934 |
+
# landmarks = torch.from_numpy(landmarks)
|
1935 |
+
# landmarks = landmarks.to('cuda')
|
1936 |
+
|
1937 |
+
priors = torch.tensor(self.anchors).view(-1, 4)
|
1938 |
+
priors = priors.to('cuda')
|
1939 |
+
|
1940 |
+
# pre = landmarks.squeeze(0)
|
1941 |
+
pre = torch.squeeze(landmarks, 0)
|
1942 |
+
|
1943 |
+
tmp = (priors[:, :2] + pre[:, :2] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 2:4] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 4:6] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 6:8] * 0.1 * priors[:, 2:], priors[:, :2] + pre[:, 8:10] * 0.1 * priors[:, 2:])
|
1944 |
+
landmarks = torch.cat(tmp, dim=1)
|
1945 |
+
# landmarks = landmarks * scale1
|
1946 |
+
landmarks = torch.mul(landmarks, scale1)
|
1947 |
+
|
1948 |
+
landmarks = landmarks.cpu().numpy()
|
1949 |
+
|
1950 |
+
# ignore low scores
|
1951 |
+
inds = torch.where(scores>score)[0]
|
1952 |
+
inds = inds.cpu().numpy()
|
1953 |
+
scores = scores.cpu().numpy()
|
1954 |
+
|
1955 |
+
landmarks, scores = landmarks[inds], scores[inds]
|
1956 |
+
|
1957 |
+
# sort
|
1958 |
+
order = scores.argsort()[::-1]
|
1959 |
+
landmarks = landmarks[order][0]
|
1960 |
+
|
1961 |
+
return np.array([[landmarks[i], landmarks[i + 1]] for i in range(0,10,2)])
|
rope/Styles.py
ADDED
@@ -0,0 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
bg = 'black'
|
2 |
+
main = '#1A1A1A' #Not as Dark Grey '#1A1A1A'
|
3 |
+
main2 = '#151515' #Dark Grey '#151515'
|
4 |
+
main3 = '#28282E' #Light Grey '#28282E'
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
canvas_frame_label_1 = {
|
9 |
+
'bg': main2,
|
10 |
+
'bd': '0',
|
11 |
+
'relief': 'flat',
|
12 |
+
'highlightthickness': '0'
|
13 |
+
}
|
14 |
+
|
15 |
+
canvas_frame_label_2 = {
|
16 |
+
'bg': main2,
|
17 |
+
'bd': '0',
|
18 |
+
'relief': 'flat',
|
19 |
+
'highlightthickness': '0'
|
20 |
+
}
|
21 |
+
|
22 |
+
canvas_frame_label_3 = {
|
23 |
+
'bg': main,
|
24 |
+
'bd': '0',
|
25 |
+
'relief': 'flat',
|
26 |
+
'highlightthickness': '0'
|
27 |
+
}
|
28 |
+
|
29 |
+
info_label = {
|
30 |
+
'bg': main2,
|
31 |
+
'fg': '#BCBCBC',
|
32 |
+
'bd': '5',
|
33 |
+
'relief': 'flat',
|
34 |
+
'highlightthickness': '0',
|
35 |
+
'font': ("Segoe UI", 9),
|
36 |
+
'anchor': 'nw',
|
37 |
+
'justify': 'left',
|
38 |
+
}
|
39 |
+
|
40 |
+
text_1 = {
|
41 |
+
'bg': main2,
|
42 |
+
'fg': 'white',
|
43 |
+
'activebackground': main2,
|
44 |
+
'activeforeground': 'white',
|
45 |
+
'relief': 'flat',
|
46 |
+
'border': '0',
|
47 |
+
'font': ("Segoe UI", 9)
|
48 |
+
}
|
49 |
+
text_2 = {
|
50 |
+
'bg': main2,
|
51 |
+
'fg': '#D0D0D0',
|
52 |
+
'activebackground': main2,
|
53 |
+
'activeforeground': 'white',
|
54 |
+
'relief': 'flat',
|
55 |
+
'border': '0',
|
56 |
+
'font': ("Segoe UI", 9)
|
57 |
+
}
|
58 |
+
text_3 = {
|
59 |
+
'bg': main,
|
60 |
+
'fg': '#979797',
|
61 |
+
'activebackground': main,
|
62 |
+
'activeforeground': 'white',
|
63 |
+
'relief': 'flat',
|
64 |
+
'border': '0',
|
65 |
+
'font': ("Segoe UI", 9)
|
66 |
+
}
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
option_slider_style = {
|
71 |
+
'bg': main,
|
72 |
+
'activebackground': main,
|
73 |
+
'highlightcolor': 'white',
|
74 |
+
'highlightthickness': '0',
|
75 |
+
'relief': 'flat',
|
76 |
+
'sliderrelief': 'flat',
|
77 |
+
'border': '0',
|
78 |
+
'width': '3',
|
79 |
+
'troughcolor': '#1F1F1F',
|
80 |
+
}
|
81 |
+
|
82 |
+
|
83 |
+
entry_3 = {
|
84 |
+
'bg': '#1F1F1F',
|
85 |
+
'fg': '#FFFFFF',
|
86 |
+
'relief': 'flat',
|
87 |
+
'border': '0',
|
88 |
+
'width': '5',
|
89 |
+
'justify': 'c',
|
90 |
+
'font': ("Segoe UI", 9),
|
91 |
+
'highlightthickness': '1',
|
92 |
+
'highlightbackground': '#17181A',
|
93 |
+
}
|
94 |
+
|
95 |
+
entry_2 = {
|
96 |
+
'bg': '#1F1F1F',
|
97 |
+
'fg': '#FFFFFF',
|
98 |
+
'relief': 'flat',
|
99 |
+
'border': '0',
|
100 |
+
'highlightthickness': '1',
|
101 |
+
'highlightbackground': '#17181A',
|
102 |
+
'width': '5',
|
103 |
+
'justify': 'l',
|
104 |
+
'font': ("Segoe UI", 9)
|
105 |
+
}
|
106 |
+
|
107 |
+
text_selection_off_3 = {
|
108 |
+
'bg': main,
|
109 |
+
'fg': '#7A7A7A',
|
110 |
+
'activebackground': main,
|
111 |
+
'activeforeground': 'white',
|
112 |
+
'relief': 'flat',
|
113 |
+
'border': '0',
|
114 |
+
'font': ("Segoe UI", 10)
|
115 |
+
}
|
116 |
+
text_selection_on_3 = {
|
117 |
+
'bg': main,
|
118 |
+
'fg': '#FFFFFF',
|
119 |
+
'activebackground': main,
|
120 |
+
'activeforeground': 'white',
|
121 |
+
'relief': 'flat',
|
122 |
+
'border': '0',
|
123 |
+
'font': ("Segoe UI", 10)
|
124 |
+
}
|
125 |
+
text_selection_off_2 = {
|
126 |
+
'bg': main2,
|
127 |
+
'fg': '#7A7A7A',
|
128 |
+
'activebackground': main2,
|
129 |
+
'activeforeground': 'white',
|
130 |
+
'relief': 'flat',
|
131 |
+
'border': '0',
|
132 |
+
'font': ("Segoe UI", 10)
|
133 |
+
}
|
134 |
+
text_selection_on_2 = {
|
135 |
+
'bg': main2,
|
136 |
+
'fg': '#FFFFFF',
|
137 |
+
'activebackground': main2,
|
138 |
+
'activeforeground': 'white',
|
139 |
+
'relief': 'flat',
|
140 |
+
'border': '0',
|
141 |
+
'font': ("Segoe UI", 10)
|
142 |
+
}
|
143 |
+
|
144 |
+
|
145 |
+
parameter_switch_3 = {
|
146 |
+
'bg': main,
|
147 |
+
'fg': '#FFFFFF',
|
148 |
+
'activebackground': main,
|
149 |
+
'activeforeground': 'white',
|
150 |
+
'relief': 'flat',
|
151 |
+
'border': '0',
|
152 |
+
'font': ("Segoe UI", 10)
|
153 |
+
}
|
154 |
+
|
155 |
+
|
156 |
+
|
157 |
+
|
158 |
+
canvas_bg = {
|
159 |
+
'bg': bg,
|
160 |
+
'relief': 'flat',
|
161 |
+
'bd': '0',
|
162 |
+
'highlightthickness': '0'
|
163 |
+
}
|
164 |
+
|
165 |
+
icon = {
|
166 |
+
'IconOn': './rope/media/OnState.png',
|
167 |
+
'IconOff': './rope/media/OffState.png',
|
168 |
+
}
|
169 |
+
|
170 |
+
|
171 |
+
frame_style_bg = {
|
172 |
+
'bg': bg,
|
173 |
+
'relief': 'flat',
|
174 |
+
'bd': '0'
|
175 |
+
}
|
176 |
+
|
177 |
+
button_3 = {
|
178 |
+
'bg': main2,
|
179 |
+
'fg': '#FFFFFF',
|
180 |
+
'activebackground': main2,
|
181 |
+
'activeforeground': 'white',
|
182 |
+
'relief': 'flat',
|
183 |
+
'border': '0',
|
184 |
+
'font': ("Segoe UI", 10)
|
185 |
+
}
|
186 |
+
button_2 = {
|
187 |
+
'bg': main2,
|
188 |
+
'fg': '#FFFFFF',
|
189 |
+
'activebackground': main2,
|
190 |
+
'activeforeground': 'white',
|
191 |
+
'relief': 'flat',
|
192 |
+
'border': '0',
|
193 |
+
'font': ("Segoe UI", 10)
|
194 |
+
}
|
195 |
+
button_1 = {
|
196 |
+
'bg': main2,
|
197 |
+
'fg': '#FFFFFF',
|
198 |
+
'activebackground': main2,
|
199 |
+
'activeforeground': 'white',
|
200 |
+
'relief': 'flat',
|
201 |
+
'border': '0',
|
202 |
+
'font': ("Segoe UI", 10)
|
203 |
+
}
|
204 |
+
|
205 |
+
button_inactive = {
|
206 |
+
'bg': main2,
|
207 |
+
'fg': '#FFFFFF',
|
208 |
+
'activebackground': main2,
|
209 |
+
'activeforeground': 'white',
|
210 |
+
'relief': 'flat',
|
211 |
+
'border': '0',
|
212 |
+
'font': ("Segoe UI", 10)
|
213 |
+
}
|
214 |
+
|
215 |
+
button_active = {
|
216 |
+
'bg': main2,
|
217 |
+
'fg': '#FFFFFF',
|
218 |
+
'activebackground': main2,
|
219 |
+
'activeforeground': 'white',
|
220 |
+
'relief': 'flat',
|
221 |
+
'border': '0',
|
222 |
+
'font': ("Segoe UI", 10)
|
223 |
+
}
|
224 |
+
|
225 |
+
|
226 |
+
media_button_off_3= {
|
227 |
+
'bg': main2,
|
228 |
+
'fg': '#7A7A7A',
|
229 |
+
'activebackground': main2,
|
230 |
+
'activeforeground': 'white',
|
231 |
+
'relief': 'flat',
|
232 |
+
'border': '0',
|
233 |
+
'font': ("Segoe UI", 8)
|
234 |
+
}
|
235 |
+
|
236 |
+
media_button_on_3= {
|
237 |
+
'bg': '#4a57ee',
|
238 |
+
'fg': '#FFFFFF',
|
239 |
+
'activebackground': '#4a57ee',
|
240 |
+
'activeforeground': 'white',
|
241 |
+
'relief': 'flat',
|
242 |
+
'border': '0',
|
243 |
+
'font': ("Segoe UI", 8)
|
244 |
+
}
|
245 |
+
|
246 |
+
ui_text_na_2 = {
|
247 |
+
'bg': main,
|
248 |
+
'fg': '#7A7A7A',
|
249 |
+
'activebackground': main,
|
250 |
+
'activeforeground': 'white',
|
251 |
+
'relief': 'flat',
|
252 |
+
'border': '0',
|
253 |
+
'font': ("Segoe UI", 9)
|
254 |
+
}
|
255 |
+
|
256 |
+
timeline_canvas = {
|
257 |
+
'bg': main,
|
258 |
+
'relief': 'flat',
|
259 |
+
'bd': '0',
|
260 |
+
'highlightthickness': '0'
|
261 |
+
}
|
262 |
+
|
263 |
+
donate_1 = {
|
264 |
+
'bg': main,
|
265 |
+
'fg': '#7562ee',
|
266 |
+
'relief': 'flat',
|
267 |
+
'border': '0',
|
268 |
+
'font': ("Segoe UI Semibold", 10),
|
269 |
+
'cursor': "hand2",
|
270 |
+
}
|
271 |
+
|
272 |
+
# Panes
|
273 |
+
# 3:#28282E
|
274 |
+
# 2:#212126
|
275 |
+
# 1:#17181A
|
276 |
+
|
277 |
+
# preview background: #1A1A1A
|
278 |
+
|
279 |
+
# Num Fields, slider bg: #1F1F1F
|
280 |
+
# slider ball: #919191
|
281 |
+
# Borders:#090909
|
282 |
+
# Text
|
283 |
+
# On/off:#FFFFFF
|
284 |
+
# labels: #D0D0D0
|
285 |
+
# notActive: #7A7A7A
|
286 |
+
# active:#FFFFFF
|
287 |
+
|
288 |
+
# highlighted button: #B1B1B2
|
289 |
+
# Button off: #828282
|
290 |
+
|
291 |
+
# on: #FFFFFF
|
292 |
+
# hover: #b1b1b2
|
293 |
+
# off: #828282
|
rope/VideoManager.py
ADDED
@@ -0,0 +1,1242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import cv2
|
3 |
+
import tkinter as tk
|
4 |
+
from PIL import Image, ImageTk
|
5 |
+
import threading
|
6 |
+
import time
|
7 |
+
import numpy as np
|
8 |
+
from skimage import transform as trans
|
9 |
+
import subprocess
|
10 |
+
from math import floor, ceil
|
11 |
+
import bisect
|
12 |
+
import onnxruntime
|
13 |
+
import torchvision
|
14 |
+
from torchvision.transforms.functional import normalize #update to v2
|
15 |
+
import torch
|
16 |
+
from torchvision import transforms
|
17 |
+
torchvision.disable_beta_transforms_warning()
|
18 |
+
from torchvision.transforms import v2
|
19 |
+
torch.set_grad_enabled(False)
|
20 |
+
onnxruntime.set_default_logger_severity(4)
|
21 |
+
|
22 |
+
import inspect #print(inspect.currentframe().f_back.f_code.co_name, 'resize_image')
|
23 |
+
|
24 |
+
device = 'cuda'
|
25 |
+
|
26 |
+
lock=threading.Lock()
|
27 |
+
|
28 |
+
class VideoManager():
|
29 |
+
def __init__(self, models ):
|
30 |
+
self.models = models
|
31 |
+
# Model related
|
32 |
+
self.swapper_model = [] # insightface swapper model
|
33 |
+
# self.faceapp_model = [] # insight faceapp model
|
34 |
+
self.input_names = [] # names of the inswapper.onnx inputs
|
35 |
+
self.input_size = [] # size of the inswapper.onnx inputs
|
36 |
+
|
37 |
+
self.output_names = [] # names of the inswapper.onnx outputs
|
38 |
+
self.arcface_dst = np.array( [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], [41.5493, 92.3655], [70.7299, 92.2041]], dtype=np.float32)
|
39 |
+
|
40 |
+
self.video_file = []
|
41 |
+
|
42 |
+
self.FFHQ_kps = np.array([[ 192.98138, 239.94708 ], [ 318.90277, 240.1936 ], [ 256.63416, 314.01935 ], [ 201.26117, 371.41043 ], [ 313.08905, 371.15118 ] ])
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
#Video related
|
47 |
+
self.capture = [] # cv2 video
|
48 |
+
self.is_video_loaded = False # flag for video loaded state
|
49 |
+
self.video_frame_total = None # length of currently loaded video
|
50 |
+
self.play = False # flag for the play button toggle
|
51 |
+
self.current_frame = 0 # the current frame of the video
|
52 |
+
self.create_video = False
|
53 |
+
self.output_video = []
|
54 |
+
self.file_name = []
|
55 |
+
|
56 |
+
|
57 |
+
# Play related
|
58 |
+
# self.set_read_threads = [] # Name of threaded function
|
59 |
+
self.frame_timer = 0.0 # used to set the framerate during playing
|
60 |
+
|
61 |
+
# Queues
|
62 |
+
self.action_q = [] # queue for sending to the coordinator
|
63 |
+
self.frame_q = [] # queue for frames that are ready for coordinator
|
64 |
+
|
65 |
+
self.r_frame_q = [] # queue for frames that are requested by the GUI
|
66 |
+
self.read_video_frame_q = []
|
67 |
+
|
68 |
+
# swapping related
|
69 |
+
# self.source_embedding = [] # array with indexed source embeddings
|
70 |
+
|
71 |
+
self.found_faces = [] # array that maps the found faces to source faces
|
72 |
+
|
73 |
+
self.parameters = []
|
74 |
+
|
75 |
+
|
76 |
+
self.target_video = []
|
77 |
+
|
78 |
+
self.fps = 1.0
|
79 |
+
self.temp_file = []
|
80 |
+
|
81 |
+
|
82 |
+
self.clip_session = []
|
83 |
+
|
84 |
+
self.start_time = []
|
85 |
+
self.record = False
|
86 |
+
self.output = []
|
87 |
+
self.image = []
|
88 |
+
|
89 |
+
self.saved_video_path = []
|
90 |
+
self.sp = []
|
91 |
+
self.timer = []
|
92 |
+
self.fps_average = []
|
93 |
+
self.total_thread_time = 0.0
|
94 |
+
|
95 |
+
self.start_play_time = []
|
96 |
+
self.start_play_frame = []
|
97 |
+
|
98 |
+
self.rec_thread = []
|
99 |
+
self.markers = []
|
100 |
+
self.is_image_loaded = False
|
101 |
+
self.stop_marker = -1
|
102 |
+
self.perf_test = False
|
103 |
+
|
104 |
+
self.control = []
|
105 |
+
|
106 |
+
|
107 |
+
|
108 |
+
|
109 |
+
|
110 |
+
self.process_q = {
|
111 |
+
"Thread": [],
|
112 |
+
"FrameNumber": [],
|
113 |
+
"ProcessedFrame": [],
|
114 |
+
"Status": 'clear',
|
115 |
+
"ThreadTime": []
|
116 |
+
}
|
117 |
+
self.process_qs = []
|
118 |
+
self.rec_q = {
|
119 |
+
"Thread": [],
|
120 |
+
"FrameNumber": [],
|
121 |
+
"Status": 'clear'
|
122 |
+
}
|
123 |
+
self.rec_qs = []
|
124 |
+
|
125 |
+
def assign_found_faces(self, found_faces):
|
126 |
+
self.found_faces = found_faces
|
127 |
+
|
128 |
+
|
129 |
+
def load_target_video( self, file ):
|
130 |
+
# If we already have a video loaded, release it
|
131 |
+
if self.capture:
|
132 |
+
self.capture.release()
|
133 |
+
|
134 |
+
# Open file
|
135 |
+
self.video_file = file
|
136 |
+
self.capture = cv2.VideoCapture(file)
|
137 |
+
self.fps = self.capture.get(cv2.CAP_PROP_FPS)
|
138 |
+
|
139 |
+
if not self.capture.isOpened():
|
140 |
+
print("Cannot open file: ", file)
|
141 |
+
|
142 |
+
else:
|
143 |
+
self.target_video = file
|
144 |
+
self.is_video_loaded = True
|
145 |
+
self.is_image_loaded = False
|
146 |
+
self.video_frame_total = int(self.capture.get(cv2.CAP_PROP_FRAME_COUNT))
|
147 |
+
self.play = False
|
148 |
+
self.current_frame = 0
|
149 |
+
self.frame_timer = time.time()
|
150 |
+
self.frame_q = []
|
151 |
+
self.r_frame_q = []
|
152 |
+
self.found_faces = []
|
153 |
+
self.add_action("set_slider_length",self.video_frame_total-1)
|
154 |
+
|
155 |
+
self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
|
156 |
+
success, image = self.capture.read()
|
157 |
+
|
158 |
+
if success:
|
159 |
+
crop = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # RGB
|
160 |
+
temp = [crop, False]
|
161 |
+
self.r_frame_q.append(temp)
|
162 |
+
self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
|
163 |
+
|
164 |
+
def load_target_image(self, file):
|
165 |
+
if self.capture:
|
166 |
+
self.capture.release()
|
167 |
+
self.is_video_loaded = False
|
168 |
+
self.play = False
|
169 |
+
self.frame_q = []
|
170 |
+
self.r_frame_q = []
|
171 |
+
self.found_faces = []
|
172 |
+
self.image = cv2.imread(file) # BGR
|
173 |
+
self.image = cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB) # RGB
|
174 |
+
temp = [self.image, False]
|
175 |
+
self.frame_q.append(temp)
|
176 |
+
|
177 |
+
self.is_image_loaded = True
|
178 |
+
|
179 |
+
|
180 |
+
## Action queue
|
181 |
+
def add_action(self, action, param):
|
182 |
+
# print(inspect.currentframe().f_back.f_code.co_name, '->add_action: '+action)
|
183 |
+
temp = [action, param]
|
184 |
+
self.action_q.append(temp)
|
185 |
+
|
186 |
+
def get_action_length(self):
|
187 |
+
return len(self.action_q)
|
188 |
+
|
189 |
+
def get_action(self):
|
190 |
+
action = self.action_q[0]
|
191 |
+
self.action_q.pop(0)
|
192 |
+
return action
|
193 |
+
|
194 |
+
## Queues for the Coordinator
|
195 |
+
def get_frame(self):
|
196 |
+
frame = self.frame_q[0]
|
197 |
+
self.frame_q.pop(0)
|
198 |
+
return frame
|
199 |
+
|
200 |
+
def get_frame_length(self):
|
201 |
+
return len(self.frame_q)
|
202 |
+
|
203 |
+
def get_requested_frame(self):
|
204 |
+
frame = self.r_frame_q[0]
|
205 |
+
self.r_frame_q.pop(0)
|
206 |
+
return frame
|
207 |
+
|
208 |
+
def get_requested_frame_length(self):
|
209 |
+
return len(self.r_frame_q)
|
210 |
+
|
211 |
+
|
212 |
+
def get_requested_video_frame(self, frame, marker=True):
|
213 |
+
temp = []
|
214 |
+
if self.is_video_loaded:
|
215 |
+
|
216 |
+
if self.play == True:
|
217 |
+
self.play_video("stop")
|
218 |
+
self.process_qs = []
|
219 |
+
|
220 |
+
self.current_frame = int(frame)
|
221 |
+
|
222 |
+
self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
|
223 |
+
success, target_image = self.capture.read() #BGR
|
224 |
+
|
225 |
+
if success:
|
226 |
+
target_image = cv2.cvtColor(target_image, cv2.COLOR_BGR2RGB) #RGB
|
227 |
+
if not self.control['SwapFacesButton']:
|
228 |
+
temp = [target_image, self.current_frame] #temp = RGB
|
229 |
+
else:
|
230 |
+
temp = [self.swap_video(target_image, self.current_frame, marker), self.current_frame] # temp = RGB
|
231 |
+
|
232 |
+
self.r_frame_q.append(temp)
|
233 |
+
|
234 |
+
elif self.is_image_loaded:
|
235 |
+
if not self.control['SwapFacesButton']:
|
236 |
+
temp = [self.image, self.current_frame] # image = RGB
|
237 |
+
|
238 |
+
else:
|
239 |
+
temp = [self.swap_video(self.image, self.current_frame, False), self.current_frame] # image = RGB
|
240 |
+
|
241 |
+
self.r_frame_q.append(temp)
|
242 |
+
|
243 |
+
|
244 |
+
def find_lowest_frame(self, queues):
|
245 |
+
min_frame=999999999
|
246 |
+
index=-1
|
247 |
+
|
248 |
+
for idx, thread in enumerate(queues):
|
249 |
+
frame = thread['FrameNumber']
|
250 |
+
if frame != []:
|
251 |
+
if frame < min_frame:
|
252 |
+
min_frame = frame
|
253 |
+
index=idx
|
254 |
+
return index, min_frame
|
255 |
+
|
256 |
+
|
257 |
+
def play_video(self, command):
|
258 |
+
# print(inspect.currentframe().f_back.f_code.co_name, '->play_video: ')
|
259 |
+
if command == "play":
|
260 |
+
# Initialization
|
261 |
+
self.play = True
|
262 |
+
self.fps_average = []
|
263 |
+
self.process_qs = []
|
264 |
+
self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
|
265 |
+
self.frame_timer = time.time()
|
266 |
+
|
267 |
+
# Create reusable queue based on number of threads
|
268 |
+
for i in range(self.parameters['ThreadsSlider']):
|
269 |
+
new_process_q = self.process_q.copy()
|
270 |
+
self.process_qs.append(new_process_q)
|
271 |
+
|
272 |
+
|
273 |
+
# Start up audio if requested
|
274 |
+
if self.control['AudioButton']:
|
275 |
+
seek_time = (self.current_frame)/self.fps
|
276 |
+
args = ["ffplay",
|
277 |
+
'-vn',
|
278 |
+
'-ss', str(seek_time),
|
279 |
+
'-nodisp',
|
280 |
+
'-stats',
|
281 |
+
'-loglevel', 'quiet',
|
282 |
+
'-sync', 'audio',
|
283 |
+
self.video_file]
|
284 |
+
|
285 |
+
|
286 |
+
self.audio_sp = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
287 |
+
|
288 |
+
# Parse the console to find where the audio started
|
289 |
+
while True:
|
290 |
+
temp = self.audio_sp.stdout.read(69)
|
291 |
+
if temp[:7] != b' nan':
|
292 |
+
sought_time = float(temp[:7])
|
293 |
+
self.current_frame = int(self.fps*sought_time)
|
294 |
+
|
295 |
+
self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
|
296 |
+
|
297 |
+
break
|
298 |
+
|
299 |
+
|
300 |
+
#' nan : 0.000
|
301 |
+
#' 1.25 M-A: 0.000 fd= 0 aq= 12KB vq= 0KB sq= 0B f=0/0'
|
302 |
+
|
303 |
+
|
304 |
+
elif command == "stop":
|
305 |
+
self.play = False
|
306 |
+
self.add_action("stop_play", True)
|
307 |
+
|
308 |
+
index, min_frame = self.find_lowest_frame(self.process_qs)
|
309 |
+
|
310 |
+
if index != -1:
|
311 |
+
self.current_frame = min_frame-1
|
312 |
+
|
313 |
+
if self.control['AudioButton']:
|
314 |
+
self.audio_sp.terminate()
|
315 |
+
|
316 |
+
torch.cuda.empty_cache()
|
317 |
+
|
318 |
+
elif command=='stop_from_gui':
|
319 |
+
self.play = False
|
320 |
+
|
321 |
+
# Find the lowest frame in the current render queue and set the current frame to the one before it
|
322 |
+
index, min_frame = self.find_lowest_frame(self.process_qs)
|
323 |
+
if index != -1:
|
324 |
+
self.current_frame = min_frame-1
|
325 |
+
|
326 |
+
if self.control['AudioButton']:
|
327 |
+
self.audio_sp.terminate()
|
328 |
+
|
329 |
+
torch.cuda.empty_cache()
|
330 |
+
|
331 |
+
elif command == "record":
|
332 |
+
self.record = True
|
333 |
+
self.play = True
|
334 |
+
self.total_thread_time = 0.0
|
335 |
+
self.process_qs = []
|
336 |
+
self.capture.set(cv2.CAP_PROP_POS_FRAMES, self.current_frame)
|
337 |
+
|
338 |
+
for i in range(self.parameters['ThreadsSlider']):
|
339 |
+
new_process_q = self.process_q.copy()
|
340 |
+
self.process_qs.append(new_process_q)
|
341 |
+
|
342 |
+
# Initialize
|
343 |
+
self.timer = time.time()
|
344 |
+
frame_width = int(self.capture.get(3))
|
345 |
+
frame_width = int(self.capture.get(3))
|
346 |
+
frame_height = int(self.capture.get(4))
|
347 |
+
|
348 |
+
self.start_time = float(self.capture.get(cv2.CAP_PROP_POS_FRAMES) / float(self.fps))
|
349 |
+
|
350 |
+
self.file_name = os.path.splitext(os.path.basename(self.target_video))
|
351 |
+
base_filename = self.file_name[0]+"_"+str(time.time())[:10]
|
352 |
+
self.output = os.path.join(self.saved_video_path, base_filename)
|
353 |
+
self.temp_file = self.output+"_temp"+self.file_name[1]
|
354 |
+
|
355 |
+
if self.parameters['RecordTypeTextSel']=='FFMPEG':
|
356 |
+
args = ["ffmpeg",
|
357 |
+
'-hide_banner',
|
358 |
+
'-loglevel', 'error',
|
359 |
+
"-an",
|
360 |
+
"-r", str(self.fps),
|
361 |
+
"-i", "pipe:",
|
362 |
+
# '-g', '25',
|
363 |
+
"-vf", "format=yuvj420p",
|
364 |
+
"-c:v", "libx264",
|
365 |
+
"-crf", str(self.parameters['VideoQualSlider']),
|
366 |
+
"-r", str(self.fps),
|
367 |
+
"-s", str(frame_width)+"x"+str(frame_height),
|
368 |
+
self.temp_file]
|
369 |
+
|
370 |
+
self.sp = subprocess.Popen(args, stdin=subprocess.PIPE)
|
371 |
+
|
372 |
+
elif self.parameters['RecordTypeTextSel']=='OPENCV':
|
373 |
+
size = (frame_width, frame_height)
|
374 |
+
self.sp = cv2.VideoWriter(self.temp_file, cv2.VideoWriter_fourcc(*'mp4v') , self.fps, size)
|
375 |
+
|
376 |
+
# @profile
|
377 |
+
def process(self):
|
378 |
+
process_qs_len = range(len(self.process_qs))
|
379 |
+
|
380 |
+
# Add threads to Queue
|
381 |
+
if self.play == True and self.is_video_loaded == True:
|
382 |
+
for item in self.process_qs:
|
383 |
+
if item['Status'] == 'clear' and self.current_frame < self.video_frame_total:
|
384 |
+
item['Thread'] = threading.Thread(target=self.thread_video_read, args = [self.current_frame]).start()
|
385 |
+
item['FrameNumber'] = self.current_frame
|
386 |
+
item['Status'] = 'started'
|
387 |
+
item['ThreadTime'] = time.time()
|
388 |
+
|
389 |
+
self.current_frame += 1
|
390 |
+
break
|
391 |
+
|
392 |
+
else:
|
393 |
+
self.play = False
|
394 |
+
|
395 |
+
# Always be emptying the queues
|
396 |
+
time_diff = time.time() - self.frame_timer
|
397 |
+
|
398 |
+
if not self.record and time_diff >= 1.0/float(self.fps) and self.play:
|
399 |
+
|
400 |
+
index, min_frame = self.find_lowest_frame(self.process_qs)
|
401 |
+
|
402 |
+
if index != -1:
|
403 |
+
if self.process_qs[index]['Status'] == 'finished':
|
404 |
+
temp = [self.process_qs[index]['ProcessedFrame'], self.process_qs[index]['FrameNumber']]
|
405 |
+
self.frame_q.append(temp)
|
406 |
+
|
407 |
+
# Report fps, other data
|
408 |
+
self.fps_average.append(1.0/time_diff)
|
409 |
+
if len(self.fps_average) >= floor(self.fps):
|
410 |
+
fps = round(np.average(self.fps_average), 2)
|
411 |
+
msg = "%s fps, %s process time" % (fps, round(self.process_qs[index]['ThreadTime'], 4))
|
412 |
+
self.fps_average = []
|
413 |
+
|
414 |
+
if self.process_qs[index]['FrameNumber'] >= self.video_frame_total-1 or self.process_qs[index]['FrameNumber'] == self.stop_marker:
|
415 |
+
self.play_video('stop')
|
416 |
+
|
417 |
+
self.process_qs[index]['Status'] = 'clear'
|
418 |
+
self.process_qs[index]['Thread'] = []
|
419 |
+
self.process_qs[index]['FrameNumber'] = []
|
420 |
+
self.process_qs[index]['ThreadTime'] = []
|
421 |
+
self.frame_timer += 1.0/self.fps
|
422 |
+
|
423 |
+
elif self.record:
|
424 |
+
|
425 |
+
index, min_frame = self.find_lowest_frame(self.process_qs)
|
426 |
+
|
427 |
+
if index != -1:
|
428 |
+
|
429 |
+
# If the swapper thread has finished generating a frame
|
430 |
+
if self.process_qs[index]['Status'] == 'finished':
|
431 |
+
image = self.process_qs[index]['ProcessedFrame']
|
432 |
+
|
433 |
+
if self.parameters['RecordTypeTextSel']=='FFMPEG':
|
434 |
+
pil_image = Image.fromarray(image)
|
435 |
+
pil_image.save(self.sp.stdin, 'BMP')
|
436 |
+
|
437 |
+
elif self.parameters['RecordTypeTextSel']=='OPENCV':
|
438 |
+
self.sp.write(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
439 |
+
|
440 |
+
temp = [image, self.process_qs[index]['FrameNumber']]
|
441 |
+
self.frame_q.append(temp)
|
442 |
+
|
443 |
+
# Close video and process
|
444 |
+
if self.process_qs[index]['FrameNumber'] >= self.video_frame_total-1 or self.process_qs[index]['FrameNumber'] == self.stop_marker or self.play == False:
|
445 |
+
self.play_video("stop")
|
446 |
+
stop_time = float(self.capture.get(cv2.CAP_PROP_POS_FRAMES) / float(self.fps))
|
447 |
+
if stop_time == 0:
|
448 |
+
stop_time = float(self.video_frame_total) / float(self.fps)
|
449 |
+
|
450 |
+
if self.parameters['RecordTypeTextSel']=='FFMPEG':
|
451 |
+
self.sp.stdin.close()
|
452 |
+
self.sp.wait()
|
453 |
+
elif self.parameters['RecordTypeTextSel']=='OPENCV':
|
454 |
+
self.sp.release()
|
455 |
+
|
456 |
+
orig_file = self.target_video
|
457 |
+
final_file = self.output+self.file_name[1]
|
458 |
+
print("adding audio...")
|
459 |
+
args = ["ffmpeg",
|
460 |
+
'-hide_banner',
|
461 |
+
'-loglevel', 'error',
|
462 |
+
"-i", self.temp_file,
|
463 |
+
"-ss", str(self.start_time), "-to", str(stop_time), "-i", orig_file,
|
464 |
+
"-c", "copy", # may be c:v
|
465 |
+
"-map", "0:v:0", "-map", "1:a:0?",
|
466 |
+
"-shortest",
|
467 |
+
final_file]
|
468 |
+
|
469 |
+
four = subprocess.run(args)
|
470 |
+
os.remove(self.temp_file)
|
471 |
+
|
472 |
+
timef= time.time() - self.timer
|
473 |
+
self.record = False
|
474 |
+
print('Video saved as:', final_file)
|
475 |
+
msg = "Total time: %s s." % (round(timef,1))
|
476 |
+
print(msg)
|
477 |
+
|
478 |
+
|
479 |
+
self.total_thread_time = []
|
480 |
+
self.process_qs[index]['Status'] = 'clear'
|
481 |
+
self.process_qs[index]['FrameNumber'] = []
|
482 |
+
self.process_qs[index]['Thread'] = []
|
483 |
+
self.frame_timer = time.time()
|
484 |
+
# @profile
|
485 |
+
def thread_video_read(self, frame_number):
|
486 |
+
with lock:
|
487 |
+
success, target_image = self.capture.read()
|
488 |
+
|
489 |
+
if success:
|
490 |
+
target_image = cv2.cvtColor(target_image, cv2.COLOR_BGR2RGB)
|
491 |
+
if not self.control['SwapFacesButton']:
|
492 |
+
temp = [target_image, frame_number]
|
493 |
+
|
494 |
+
else:
|
495 |
+
temp = [self.swap_video(target_image, frame_number, True), frame_number]
|
496 |
+
|
497 |
+
for item in self.process_qs:
|
498 |
+
if item['FrameNumber'] == frame_number:
|
499 |
+
item['ProcessedFrame'] = temp[0]
|
500 |
+
item['Status'] = 'finished'
|
501 |
+
item['ThreadTime'] = time.time() - item['ThreadTime']
|
502 |
+
break
|
503 |
+
|
504 |
+
|
505 |
+
|
506 |
+
|
507 |
+
# @profile
|
508 |
+
def swap_video(self, target_image, frame_number, use_markers):
|
509 |
+
# Grab a local copy of the parameters to prevent threading issues
|
510 |
+
parameters = self.parameters.copy()
|
511 |
+
control = self.control.copy()
|
512 |
+
|
513 |
+
# Find out if the frame is in a marker zone and copy the parameters if true
|
514 |
+
if self.markers and use_markers:
|
515 |
+
temp=[]
|
516 |
+
for i in range(len(self.markers)):
|
517 |
+
temp.append(self.markers[i]['frame'])
|
518 |
+
idx = bisect.bisect(temp, frame_number)
|
519 |
+
|
520 |
+
parameters = self.markers[idx-1]['parameters'].copy()
|
521 |
+
|
522 |
+
# Load frame into VRAM
|
523 |
+
img = torch.from_numpy(target_image.astype('uint8')).to('cuda') #HxWxc
|
524 |
+
img = img.permute(2,0,1)#cxHxW
|
525 |
+
|
526 |
+
#Scale up frame if it is smaller than 512
|
527 |
+
img_x = img.size()[2]
|
528 |
+
img_y = img.size()[1]
|
529 |
+
|
530 |
+
if img_x<512 and img_y<512:
|
531 |
+
# if x is smaller, set x to 512
|
532 |
+
if img_x <= img_y:
|
533 |
+
tscale = v2.Resize((int(512*img_y/img_x), 512), antialias=True)
|
534 |
+
else:
|
535 |
+
tscale = v2.Resize((512, int(512*img_x/img_y)), antialias=True)
|
536 |
+
|
537 |
+
img = tscale(img)
|
538 |
+
|
539 |
+
elif img_x<512:
|
540 |
+
tscale = v2.Resize((int(512*img_y/img_x), 512), antialias=True)
|
541 |
+
img = tscale(img)
|
542 |
+
|
543 |
+
elif img_y<512:
|
544 |
+
tscale = v2.Resize((512, int(512*img_x/img_y)), antialias=True)
|
545 |
+
img = tscale(img)
|
546 |
+
|
547 |
+
# Rotate the frame
|
548 |
+
if parameters['OrientSwitch']:
|
549 |
+
img = v2.functional.rotate(img, angle=parameters['OrientSlider'], interpolation=v2.InterpolationMode.BILINEAR, expand=True)
|
550 |
+
|
551 |
+
# Find all faces in frame and return a list of 5-pt kpss
|
552 |
+
bboxes, kpss = self.func_w_test("detect", self.models.run_detect, img, parameters['DetectTypeTextSel'], max_num=20, score=parameters['DetectScoreSlider']/100.0, use_landmark_detection=parameters['LandmarksDetectionAdjSwitch'], landmark_detect_mode=parameters["LandmarksDetectTypeTextSel"], landmark_score=parameters["LandmarksDetectScoreSlider"]/100.0, from_points=parameters["LandmarksAlignModeFromPointsSwitch"])
|
553 |
+
|
554 |
+
# Get embeddings for all faces found in the frame
|
555 |
+
ret = []
|
556 |
+
for face_kps in kpss:
|
557 |
+
face_emb, _ = self.func_w_test('recognize', self.models.run_recognize, img, face_kps)
|
558 |
+
ret.append([face_kps, face_emb])
|
559 |
+
|
560 |
+
if ret:
|
561 |
+
# Loop through target faces to see if they match our found face embeddings
|
562 |
+
for fface in ret:
|
563 |
+
for found_face in self.found_faces:
|
564 |
+
# sim between face in video and already found face
|
565 |
+
sim = self.findCosineDistance(fface[1], found_face["Embedding"])
|
566 |
+
# if the face[i] in the frame matches afound face[j] AND the found face is active (not [])
|
567 |
+
if sim>=float(parameters["ThresholdSlider"]) and found_face["SourceFaceAssignments"]:
|
568 |
+
s_e = found_face["AssignedEmbedding"]
|
569 |
+
# s_e = found_face['ptrdata']
|
570 |
+
img = self.func_w_test("swap_video", self.swap_core, img, fface[0], s_e, parameters, control)
|
571 |
+
# img = img.permute(2,0,1)
|
572 |
+
|
573 |
+
img = img.permute(1,2,0)
|
574 |
+
if not control['MaskViewButton'] and parameters['OrientSwitch']:
|
575 |
+
img = img.permute(2,0,1)
|
576 |
+
img = transforms.functional.rotate(img, angle=-parameters['OrientSlider'], expand=True)
|
577 |
+
img = img.permute(1,2,0)
|
578 |
+
|
579 |
+
else:
|
580 |
+
img = img.permute(1,2,0)
|
581 |
+
if parameters['OrientSwitch']:
|
582 |
+
img = img.permute(2,0,1)
|
583 |
+
img = v2.functional.rotate(img, angle=-parameters['OrientSlider'], interpolation=v2.InterpolationMode.BILINEAR, expand=True)
|
584 |
+
img = img.permute(1,2,0)
|
585 |
+
|
586 |
+
if self.perf_test:
|
587 |
+
print('------------------------')
|
588 |
+
|
589 |
+
# Unscale small videos
|
590 |
+
if img_x <512 or img_y < 512:
|
591 |
+
tscale = v2.Resize((img_y, img_x), antialias=True)
|
592 |
+
img = img.permute(2,0,1)
|
593 |
+
img = tscale(img)
|
594 |
+
img = img.permute(1,2,0)
|
595 |
+
|
596 |
+
|
597 |
+
img = img.cpu().numpy()
|
598 |
+
|
599 |
+
if parameters["ShowLandmarksSwitch"]:
|
600 |
+
if ret:
|
601 |
+
if img_y <= 720:
|
602 |
+
p = 1
|
603 |
+
else:
|
604 |
+
p = 2
|
605 |
+
|
606 |
+
for face in ret:
|
607 |
+
for kpoint in face[0]:
|
608 |
+
for i in range(-1, p):
|
609 |
+
for j in range(-1, p):
|
610 |
+
try:
|
611 |
+
img[int(kpoint[1])+i][int(kpoint[0])+j][0] = 0
|
612 |
+
img[int(kpoint[1])+i][int(kpoint[0])+j][1] = 255
|
613 |
+
img[int(kpoint[1])+i][int(kpoint[0])+j][2] = 255
|
614 |
+
except:
|
615 |
+
print("Key-points value {} exceed the image size {}.".format(kpoint, (img_x, img_y)))
|
616 |
+
continue
|
617 |
+
|
618 |
+
return img.astype(np.uint8)
|
619 |
+
|
620 |
+
def findCosineDistance(self, vector1, vector2):
|
621 |
+
vector1 = vector1.ravel()
|
622 |
+
vector2 = vector2.ravel()
|
623 |
+
cos_dist = 1.0 - np.dot(vector1, vector2)/(np.linalg.norm(vector1)*np.linalg.norm(vector2)) # 2..0
|
624 |
+
|
625 |
+
return 100.0-cos_dist*50.0
|
626 |
+
'''
|
627 |
+
vector1 = vector1.ravel()
|
628 |
+
vector2 = vector2.ravel()
|
629 |
+
|
630 |
+
return 1 - np.dot(vector1, vector2)/(np.linalg.norm(vector1)*np.linalg.norm(vector2))
|
631 |
+
'''
|
632 |
+
|
633 |
+
def func_w_test(self, name, func, *args, **argsv):
|
634 |
+
timing = time.time()
|
635 |
+
result = func(*args, **argsv)
|
636 |
+
if self.perf_test:
|
637 |
+
print(name, round(time.time()-timing, 5), 's')
|
638 |
+
return result
|
639 |
+
|
640 |
+
# @profile
|
641 |
+
def swap_core(self, img, kps, s_e, parameters, control): # img = RGB
|
642 |
+
# 512 transforms
|
643 |
+
dst = self.arcface_dst * 4.0
|
644 |
+
dst[:,0] += 32.0
|
645 |
+
|
646 |
+
# Change the ref points
|
647 |
+
if parameters['FaceAdjSwitch']:
|
648 |
+
dst[:,0] += parameters['KPSXSlider']
|
649 |
+
dst[:,1] += parameters['KPSYSlider']
|
650 |
+
dst[:,0] -= 255
|
651 |
+
dst[:,0] *= (1+parameters['KPSScaleSlider']/100)
|
652 |
+
dst[:,0] += 255
|
653 |
+
dst[:,1] -= 255
|
654 |
+
dst[:,1] *= (1+parameters['KPSScaleSlider']/100)
|
655 |
+
dst[:,1] += 255
|
656 |
+
|
657 |
+
tform = trans.SimilarityTransform()
|
658 |
+
tform.estimate(kps, dst)
|
659 |
+
|
660 |
+
# Scaling Transforms
|
661 |
+
t512 = v2.Resize((512, 512), interpolation=v2.InterpolationMode.BILINEAR, antialias=False)
|
662 |
+
t256 = v2.Resize((256, 256), interpolation=v2.InterpolationMode.BILINEAR, antialias=False)
|
663 |
+
t128 = v2.Resize((128, 128), interpolation=v2.InterpolationMode.BILINEAR, antialias=False)
|
664 |
+
|
665 |
+
# Grab 512 face from image and create 256 and 128 copys
|
666 |
+
original_face_512 = v2.functional.affine(img, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0), interpolation=v2.InterpolationMode.BILINEAR )
|
667 |
+
original_face_512 = v2.functional.crop(original_face_512, 0,0, 512, 512)# 3, 512, 512
|
668 |
+
original_face_256 = t256(original_face_512)
|
669 |
+
original_face_128 = t128(original_face_256)
|
670 |
+
|
671 |
+
latent = torch.from_numpy(self.models.calc_swapper_latent(s_e)).float().to('cuda')
|
672 |
+
|
673 |
+
dim = 1
|
674 |
+
if parameters['SwapperTypeTextSel'] == '128':
|
675 |
+
dim = 1
|
676 |
+
input_face_affined = original_face_128
|
677 |
+
elif parameters['SwapperTypeTextSel'] == '256':
|
678 |
+
dim = 2
|
679 |
+
input_face_affined = original_face_256
|
680 |
+
elif parameters['SwapperTypeTextSel'] == '512':
|
681 |
+
dim = 4
|
682 |
+
input_face_affined = original_face_512
|
683 |
+
|
684 |
+
# Optional Scaling # change the thransform matrix
|
685 |
+
if parameters['FaceAdjSwitch']:
|
686 |
+
input_face_affined = v2.functional.affine(input_face_affined, 0, (0, 0), 1 + parameters['FaceScaleSlider'] / 100, 0, center=(dim*128-1, dim*128-1), interpolation=v2.InterpolationMode.BILINEAR)
|
687 |
+
|
688 |
+
itex = 1
|
689 |
+
if parameters['StrengthSwitch']:
|
690 |
+
itex = ceil(parameters['StrengthSlider'] / 100.)
|
691 |
+
|
692 |
+
output_size = int(128 * dim)
|
693 |
+
output = torch.zeros((output_size, output_size, 3), dtype=torch.float32, device='cuda')
|
694 |
+
input_face_affined = input_face_affined.permute(1, 2, 0)
|
695 |
+
input_face_affined = torch.div(input_face_affined, 255.0)
|
696 |
+
|
697 |
+
for k in range(itex):
|
698 |
+
for j in range(dim):
|
699 |
+
for i in range(dim):
|
700 |
+
input_face_disc = input_face_affined[j::dim,i::dim]
|
701 |
+
input_face_disc = input_face_disc.permute(2, 0, 1)
|
702 |
+
input_face_disc = torch.unsqueeze(input_face_disc, 0).contiguous()
|
703 |
+
|
704 |
+
swapper_output = torch.empty((1,3,128,128), dtype=torch.float32, device='cuda').contiguous()
|
705 |
+
self.models.run_swapper(input_face_disc, latent, swapper_output)
|
706 |
+
|
707 |
+
swapper_output = torch.squeeze(swapper_output)
|
708 |
+
swapper_output = swapper_output.permute(1, 2, 0)
|
709 |
+
|
710 |
+
|
711 |
+
output[j::dim, i::dim] = swapper_output.clone()
|
712 |
+
prev_face = input_face_affined.clone()
|
713 |
+
input_face_affined = output.clone()
|
714 |
+
output = torch.mul(output, 255)
|
715 |
+
output = torch.clamp(output, 0, 255)
|
716 |
+
|
717 |
+
|
718 |
+
output = output.permute(2, 0, 1)
|
719 |
+
|
720 |
+
|
721 |
+
swap = t512(output)
|
722 |
+
|
723 |
+
if parameters['StrengthSwitch']:
|
724 |
+
if itex == 0:
|
725 |
+
swap = original_face_512.clone()
|
726 |
+
else:
|
727 |
+
alpha = np.mod(parameters['StrengthSlider'], 100)*0.01
|
728 |
+
if alpha==0:
|
729 |
+
alpha=1
|
730 |
+
|
731 |
+
# Blend the images
|
732 |
+
prev_face = torch.mul(prev_face, 255)
|
733 |
+
prev_face = torch.clamp(prev_face, 0, 255)
|
734 |
+
prev_face = prev_face.permute(2, 0, 1)
|
735 |
+
prev_face = t512(prev_face)
|
736 |
+
swap = torch.mul(swap, alpha)
|
737 |
+
prev_face = torch.mul(prev_face, 1-alpha)
|
738 |
+
swap = torch.add(swap, prev_face)
|
739 |
+
|
740 |
+
|
741 |
+
|
742 |
+
|
743 |
+
# swap = torch.squeeze(swap)
|
744 |
+
# swap = torch.mul(swap, 255)
|
745 |
+
# swap = torch.clamp(swap, 0, 255)
|
746 |
+
# # swap_128 = swap
|
747 |
+
# swap = t256(swap)
|
748 |
+
# swap = t512(swap)
|
749 |
+
|
750 |
+
|
751 |
+
# Apply color corerctions
|
752 |
+
if parameters['ColorSwitch']:
|
753 |
+
# print(parameters['ColorGammaSlider'])
|
754 |
+
swap = torch.unsqueeze(swap,0)
|
755 |
+
swap = v2.functional.adjust_gamma(swap, parameters['ColorGammaSlider'], 1.0)
|
756 |
+
swap = torch.squeeze(swap)
|
757 |
+
swap = swap.permute(1, 2, 0).type(torch.float32)
|
758 |
+
|
759 |
+
del_color = torch.tensor([parameters['ColorRedSlider'], parameters['ColorGreenSlider'], parameters['ColorBlueSlider']], device=device)
|
760 |
+
swap += del_color
|
761 |
+
swap = torch.clamp(swap, min=0., max=255.)
|
762 |
+
swap = swap.permute(2, 0, 1).type(torch.uint8)
|
763 |
+
|
764 |
+
# Create border mask
|
765 |
+
border_mask = torch.ones((128, 128), dtype=torch.float32, device=device)
|
766 |
+
border_mask = torch.unsqueeze(border_mask,0)
|
767 |
+
|
768 |
+
# if parameters['BorderState']:
|
769 |
+
top = parameters['BorderTopSlider']
|
770 |
+
left = parameters['BorderSidesSlider']
|
771 |
+
right = 128-parameters['BorderSidesSlider']
|
772 |
+
bottom = 128-parameters['BorderBottomSlider']
|
773 |
+
|
774 |
+
border_mask[:, :top, :] = 0
|
775 |
+
border_mask[:, bottom:, :] = 0
|
776 |
+
border_mask[:, :, :left] = 0
|
777 |
+
border_mask[:, :, right:] = 0
|
778 |
+
|
779 |
+
gauss = transforms.GaussianBlur(parameters['BorderBlurSlider']*2+1, (parameters['BorderBlurSlider']+1)*0.2)
|
780 |
+
border_mask = gauss(border_mask)
|
781 |
+
|
782 |
+
# Create image mask
|
783 |
+
swap_mask = torch.ones((128, 128), dtype=torch.float32, device=device)
|
784 |
+
swap_mask = torch.unsqueeze(swap_mask,0)
|
785 |
+
|
786 |
+
# Face Diffing
|
787 |
+
if parameters["DiffSwitch"]:
|
788 |
+
mask = self.apply_fake_diff(swap, original_face_512, parameters["DiffSlider"])
|
789 |
+
# mask = t128(mask)
|
790 |
+
gauss = transforms.GaussianBlur(parameters['BlendSlider']*2+1, (parameters['BlendSlider']+1)*0.2)
|
791 |
+
mask = gauss(mask.type(torch.float32))
|
792 |
+
swap = swap*mask + original_face_512*(1-mask)
|
793 |
+
|
794 |
+
# Restorer
|
795 |
+
if parameters["RestorerSwitch"]:
|
796 |
+
swap = self.func_w_test('Restorer', self.apply_restorer, swap, parameters)
|
797 |
+
|
798 |
+
|
799 |
+
# Occluder
|
800 |
+
if parameters["OccluderSwitch"]:
|
801 |
+
mask = self.func_w_test('occluder', self.apply_occlusion , original_face_256, parameters["OccluderSlider"])
|
802 |
+
mask = t128(mask)
|
803 |
+
swap_mask = torch.mul(swap_mask, mask)
|
804 |
+
|
805 |
+
|
806 |
+
if parameters["FaceParserSwitch"]:
|
807 |
+
mask = self.apply_face_parser(swap, parameters["FaceParserSlider"], parameters['MouthParserSlider'])
|
808 |
+
mask = t128(mask)
|
809 |
+
swap_mask = torch.mul(swap_mask, mask)
|
810 |
+
|
811 |
+
# CLIPs
|
812 |
+
if parameters["CLIPSwitch"]:
|
813 |
+
with lock:
|
814 |
+
mask = self.func_w_test('CLIP', self.apply_CLIPs, original_face_512, parameters["CLIPTextEntry"], parameters["CLIPSlider"])
|
815 |
+
mask = cv2.resize(mask, (128,128))
|
816 |
+
mask = torch.from_numpy(mask).to('cuda')
|
817 |
+
swap_mask *= mask
|
818 |
+
|
819 |
+
|
820 |
+
# Add blur to swap_mask results
|
821 |
+
gauss = transforms.GaussianBlur(parameters['BlendSlider']*2+1, (parameters['BlendSlider']+1)*0.2)
|
822 |
+
swap_mask = gauss(swap_mask)
|
823 |
+
|
824 |
+
|
825 |
+
# Combine border and swap mask, scale, and apply to swap
|
826 |
+
swap_mask = torch.mul(swap_mask, border_mask)
|
827 |
+
swap_mask = t512(swap_mask)
|
828 |
+
swap = torch.mul(swap, swap_mask)
|
829 |
+
|
830 |
+
if not control['MaskViewButton']:
|
831 |
+
# Cslculate the area to be mergerd back to the original frame
|
832 |
+
IM512 = tform.inverse.params[0:2, :]
|
833 |
+
corners = np.array([[0,0], [0,511], [511, 0], [511, 511]])
|
834 |
+
|
835 |
+
x = (IM512[0][0]*corners[:,0] + IM512[0][1]*corners[:,1] + IM512[0][2])
|
836 |
+
y = (IM512[1][0]*corners[:,0] + IM512[1][1]*corners[:,1] + IM512[1][2])
|
837 |
+
|
838 |
+
left = floor(np.min(x))
|
839 |
+
if left<0:
|
840 |
+
left=0
|
841 |
+
top = floor(np.min(y))
|
842 |
+
if top<0:
|
843 |
+
top=0
|
844 |
+
right = ceil(np.max(x))
|
845 |
+
if right>img.shape[2]:
|
846 |
+
right=img.shape[2]
|
847 |
+
bottom = ceil(np.max(y))
|
848 |
+
if bottom>img.shape[1]:
|
849 |
+
bottom=img.shape[1]
|
850 |
+
|
851 |
+
# Untransform the swap
|
852 |
+
swap = v2.functional.pad(swap, (0,0,img.shape[2]-512, img.shape[1]-512))
|
853 |
+
swap = v2.functional.affine(swap, tform.inverse.rotation*57.2958, (tform.inverse.translation[0], tform.inverse.translation[1]), tform.inverse.scale, 0,interpolation=v2.InterpolationMode.BILINEAR, center = (0,0) )
|
854 |
+
swap = swap[0:3, top:bottom, left:right]
|
855 |
+
swap = swap.permute(1, 2, 0)
|
856 |
+
|
857 |
+
# Untransform the swap mask
|
858 |
+
swap_mask = v2.functional.pad(swap_mask, (0,0,img.shape[2]-512, img.shape[1]-512))
|
859 |
+
swap_mask = v2.functional.affine(swap_mask, tform.inverse.rotation*57.2958, (tform.inverse.translation[0], tform.inverse.translation[1]), tform.inverse.scale, 0, interpolation=v2.InterpolationMode.BILINEAR, center = (0,0) )
|
860 |
+
swap_mask = swap_mask[0:1, top:bottom, left:right]
|
861 |
+
swap_mask = swap_mask.permute(1, 2, 0)
|
862 |
+
swap_mask = torch.sub(1, swap_mask)
|
863 |
+
|
864 |
+
# Apply the mask to the original image areas
|
865 |
+
img_crop = img[0:3, top:bottom, left:right]
|
866 |
+
img_crop = img_crop.permute(1,2,0)
|
867 |
+
img_crop = torch.mul(swap_mask,img_crop)
|
868 |
+
|
869 |
+
#Add the cropped areas and place them back into the original image
|
870 |
+
swap = torch.add(swap, img_crop)
|
871 |
+
swap = swap.type(torch.uint8)
|
872 |
+
swap = swap.permute(2,0,1)
|
873 |
+
img[0:3, top:bottom, left:right] = swap
|
874 |
+
|
875 |
+
else:
|
876 |
+
# Invert swap mask
|
877 |
+
swap_mask = torch.sub(1, swap_mask)
|
878 |
+
|
879 |
+
# Combine preswapped face with swap
|
880 |
+
original_face_512 = torch.mul(swap_mask, original_face_512)
|
881 |
+
original_face_512 = torch.add(swap, original_face_512)
|
882 |
+
original_face_512 = original_face_512.type(torch.uint8)
|
883 |
+
original_face_512 = original_face_512.permute(1, 2, 0)
|
884 |
+
|
885 |
+
# Uninvert and create image from swap mask
|
886 |
+
swap_mask = torch.sub(1, swap_mask)
|
887 |
+
swap_mask = torch.cat((swap_mask,swap_mask,swap_mask),0)
|
888 |
+
swap_mask = swap_mask.permute(1, 2, 0)
|
889 |
+
|
890 |
+
# Place them side by side
|
891 |
+
img = torch.hstack([original_face_512, swap_mask*255])
|
892 |
+
img = img.permute(2,0,1)
|
893 |
+
|
894 |
+
return img
|
895 |
+
|
896 |
+
# @profile
|
897 |
+
def apply_occlusion(self, img, amount):
|
898 |
+
img = torch.div(img, 255)
|
899 |
+
img = torch.unsqueeze(img, 0)
|
900 |
+
outpred = torch.ones((256,256), dtype=torch.float32, device=device).contiguous()
|
901 |
+
|
902 |
+
self.models.run_occluder(img, outpred)
|
903 |
+
|
904 |
+
outpred = torch.squeeze(outpred)
|
905 |
+
outpred = (outpred > 0)
|
906 |
+
outpred = torch.unsqueeze(outpred, 0).type(torch.float32)
|
907 |
+
|
908 |
+
if amount >0:
|
909 |
+
kernel = torch.ones((1,1,3,3), dtype=torch.float32, device=device)
|
910 |
+
|
911 |
+
for i in range(int(amount)):
|
912 |
+
outpred = torch.nn.functional.conv2d(outpred, kernel, padding=(1, 1))
|
913 |
+
outpred = torch.clamp(outpred, 0, 1)
|
914 |
+
|
915 |
+
outpred = torch.squeeze(outpred)
|
916 |
+
|
917 |
+
if amount <0:
|
918 |
+
outpred = torch.neg(outpred)
|
919 |
+
outpred = torch.add(outpred, 1)
|
920 |
+
kernel = torch.ones((1,1,3,3), dtype=torch.float32, device=device)
|
921 |
+
|
922 |
+
for i in range(int(-amount)):
|
923 |
+
outpred = torch.nn.functional.conv2d(outpred, kernel, padding=(1, 1))
|
924 |
+
outpred = torch.clamp(outpred, 0, 1)
|
925 |
+
|
926 |
+
outpred = torch.squeeze(outpred)
|
927 |
+
outpred = torch.neg(outpred)
|
928 |
+
outpred = torch.add(outpred, 1)
|
929 |
+
|
930 |
+
outpred = torch.reshape(outpred, (1, 256, 256))
|
931 |
+
return outpred
|
932 |
+
|
933 |
+
|
934 |
+
def apply_CLIPs(self, img, CLIPText, CLIPAmount):
|
935 |
+
clip_mask = np.ones((352, 352))
|
936 |
+
img = img.permute(1,2,0)
|
937 |
+
img = img.cpu().numpy()
|
938 |
+
# img = img.to(torch.float)
|
939 |
+
# img = img.permute(1,2,0)
|
940 |
+
transform = transforms.Compose([transforms.ToTensor(),
|
941 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
942 |
+
transforms.Resize((352, 352))])
|
943 |
+
CLIPimg = transform(img).unsqueeze(0)
|
944 |
+
|
945 |
+
if CLIPText != "":
|
946 |
+
prompts = CLIPText.split(',')
|
947 |
+
|
948 |
+
with torch.no_grad():
|
949 |
+
preds = self.clip_session(CLIPimg.repeat(len(prompts),1,1,1), prompts)[0]
|
950 |
+
# preds = self.clip_session(CLIPimg, maskimg, True)[0]
|
951 |
+
|
952 |
+
clip_mask = 1 - torch.sigmoid(preds[0][0])
|
953 |
+
for i in range(len(prompts)-1):
|
954 |
+
clip_mask *= 1-torch.sigmoid(preds[i+1][0])
|
955 |
+
clip_mask = clip_mask.data.cpu().numpy()
|
956 |
+
|
957 |
+
thresh = CLIPAmount/100.0
|
958 |
+
clip_mask[clip_mask>thresh] = 1.0
|
959 |
+
clip_mask[clip_mask<=thresh] = 0.0
|
960 |
+
return clip_mask
|
961 |
+
|
962 |
+
# @profile
|
963 |
+
def apply_face_parser(self, img, FaceAmount, MouthAmount):
|
964 |
+
|
965 |
+
# atts = [1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye', 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r', 10 'nose', 11 'mouth', 12 'u_lip', 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
|
966 |
+
|
967 |
+
outpred = torch.ones((512,512), dtype=torch.float32, device='cuda').contiguous()
|
968 |
+
|
969 |
+
|
970 |
+
img = torch.div(img, 255)
|
971 |
+
img = v2.functional.normalize(img, (0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
972 |
+
img = torch.reshape(img, (1, 3, 512, 512))
|
973 |
+
outpred = torch.empty((1,19,512,512), dtype=torch.float32, device='cuda').contiguous()
|
974 |
+
|
975 |
+
self.models.run_faceparser(img, outpred)
|
976 |
+
|
977 |
+
outpred = torch.squeeze(outpred)
|
978 |
+
outpred = torch.argmax(outpred, 0)
|
979 |
+
|
980 |
+
# Mouth Parse
|
981 |
+
if MouthAmount <0:
|
982 |
+
mouth_idxs = torch.tensor([11], device='cuda')
|
983 |
+
iters = int(-MouthAmount)
|
984 |
+
|
985 |
+
mouth_parse = torch.isin(outpred, mouth_idxs)
|
986 |
+
mouth_parse = torch.clamp(~mouth_parse, 0, 1).type(torch.float32)
|
987 |
+
mouth_parse = torch.reshape(mouth_parse, (1, 1, 512, 512))
|
988 |
+
mouth_parse = torch.neg(mouth_parse)
|
989 |
+
mouth_parse = torch.add(mouth_parse, 1)
|
990 |
+
|
991 |
+
kernel = torch.ones((1, 1, 3, 3), dtype=torch.float32,
|
992 |
+
device='cuda')
|
993 |
+
|
994 |
+
for i in range(iters):
|
995 |
+
mouth_parse = torch.nn.functional.conv2d(mouth_parse, kernel,
|
996 |
+
padding=(1, 1))
|
997 |
+
mouth_parse = torch.clamp(mouth_parse, 0, 1)
|
998 |
+
|
999 |
+
mouth_parse = torch.squeeze(mouth_parse)
|
1000 |
+
mouth_parse = torch.neg(mouth_parse)
|
1001 |
+
mouth_parse = torch.add(mouth_parse, 1)
|
1002 |
+
mouth_parse = torch.reshape(mouth_parse, (1, 512, 512))
|
1003 |
+
|
1004 |
+
elif MouthAmount >0:
|
1005 |
+
mouth_idxs = torch.tensor([11,12,13], device='cuda')
|
1006 |
+
iters = int(MouthAmount)
|
1007 |
+
|
1008 |
+
mouth_parse = torch.isin(outpred, mouth_idxs)
|
1009 |
+
mouth_parse = torch.clamp(~mouth_parse, 0, 1).type(torch.float32)
|
1010 |
+
mouth_parse = torch.reshape(mouth_parse, (1,1,512,512))
|
1011 |
+
mouth_parse = torch.neg(mouth_parse)
|
1012 |
+
mouth_parse = torch.add(mouth_parse, 1)
|
1013 |
+
|
1014 |
+
kernel = torch.ones((1,1,3,3), dtype=torch.float32, device='cuda')
|
1015 |
+
|
1016 |
+
for i in range(iters):
|
1017 |
+
mouth_parse = torch.nn.functional.conv2d(mouth_parse, kernel, padding=(1, 1))
|
1018 |
+
mouth_parse = torch.clamp(mouth_parse, 0, 1)
|
1019 |
+
|
1020 |
+
mouth_parse = torch.squeeze(mouth_parse)
|
1021 |
+
mouth_parse = torch.neg(mouth_parse)
|
1022 |
+
mouth_parse = torch.add(mouth_parse, 1)
|
1023 |
+
mouth_parse = torch.reshape(mouth_parse, (1, 512, 512))
|
1024 |
+
|
1025 |
+
else:
|
1026 |
+
mouth_parse = torch.ones((1, 512, 512), dtype=torch.float32, device='cuda')
|
1027 |
+
|
1028 |
+
# BG Parse
|
1029 |
+
bg_idxs = torch.tensor([0, 14, 15, 16, 17, 18], device=device)
|
1030 |
+
bg_parse = torch.isin(outpred, bg_idxs)
|
1031 |
+
bg_parse = torch.clamp(~bg_parse, 0, 1).type(torch.float32)
|
1032 |
+
bg_parse = torch.reshape(bg_parse, (1, 1, 512, 512))
|
1033 |
+
|
1034 |
+
if FaceAmount > 0:
|
1035 |
+
kernel = torch.ones((1, 1, 3, 3), dtype=torch.float32, device=device)
|
1036 |
+
|
1037 |
+
for i in range(int(FaceAmount)):
|
1038 |
+
bg_parse = torch.nn.functional.conv2d(bg_parse, kernel, padding=(1, 1))
|
1039 |
+
bg_parse = torch.clamp(bg_parse, 0, 1)
|
1040 |
+
|
1041 |
+
bg_parse = torch.squeeze(bg_parse)
|
1042 |
+
|
1043 |
+
elif FaceAmount < 0:
|
1044 |
+
bg_parse = torch.neg(bg_parse)
|
1045 |
+
bg_parse = torch.add(bg_parse, 1)
|
1046 |
+
|
1047 |
+
kernel = torch.ones((1, 1, 3, 3), dtype=torch.float32, device=device)
|
1048 |
+
|
1049 |
+
for i in range(int(-FaceAmount)):
|
1050 |
+
bg_parse = torch.nn.functional.conv2d(bg_parse, kernel, padding=(1, 1))
|
1051 |
+
bg_parse = torch.clamp(bg_parse, 0, 1)
|
1052 |
+
|
1053 |
+
bg_parse = torch.squeeze(bg_parse)
|
1054 |
+
bg_parse = torch.neg(bg_parse)
|
1055 |
+
bg_parse = torch.add(bg_parse, 1)
|
1056 |
+
bg_parse = torch.reshape(bg_parse, (1, 512, 512))
|
1057 |
+
else:
|
1058 |
+
bg_parse = torch.ones((1,512,512), dtype=torch.float32, device='cuda')
|
1059 |
+
|
1060 |
+
out_parse = torch.mul(bg_parse, mouth_parse)
|
1061 |
+
|
1062 |
+
return out_parse
|
1063 |
+
|
1064 |
+
def apply_bg_face_parser(self, img, FaceParserAmount):
|
1065 |
+
|
1066 |
+
# atts = [1 'skin', 2 'l_brow', 3 'r_brow', 4 'l_eye', 5 'r_eye', 6 'eye_g', 7 'l_ear', 8 'r_ear', 9 'ear_r', 10 'nose', 11 'mouth', 12 'u_lip', 13 'l_lip', 14 'neck', 15 'neck_l', 16 'cloth', 17 'hair', 18 'hat']
|
1067 |
+
# out = np.ones((512, 512), dtype=np.float32)
|
1068 |
+
|
1069 |
+
outpred = torch.ones((512,512), dtype=torch.float32, device='cuda').contiguous()
|
1070 |
+
|
1071 |
+
# turn mouth parser off at 0 so someone can just use the mouth parser
|
1072 |
+
if FaceParserAmount != 0:
|
1073 |
+
img = torch.div(img, 255)
|
1074 |
+
img = v2.functional.normalize(img, (0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
|
1075 |
+
img = torch.reshape(img, (1, 3, 512, 512))
|
1076 |
+
outpred = torch.empty((1,19,512,512), dtype=torch.float32, device=device).contiguous()
|
1077 |
+
|
1078 |
+
self.models.run_faceparser(img, outpred)
|
1079 |
+
|
1080 |
+
outpred = torch.squeeze(outpred)
|
1081 |
+
outpred = torch.argmax(outpred, 0)
|
1082 |
+
|
1083 |
+
test = torch.tensor([ 0, 14, 15, 16, 17, 18], device=device)
|
1084 |
+
outpred = torch.isin(outpred, test)
|
1085 |
+
outpred = torch.clamp(~outpred, 0, 1).type(torch.float32)
|
1086 |
+
outpred = torch.reshape(outpred, (1,1,512,512))
|
1087 |
+
|
1088 |
+
if FaceParserAmount >0:
|
1089 |
+
kernel = torch.ones((1,1,3,3), dtype=torch.float32, device=device)
|
1090 |
+
|
1091 |
+
for i in range(int(FaceParserAmount)):
|
1092 |
+
outpred = torch.nn.functional.conv2d(outpred, kernel, padding=(1, 1))
|
1093 |
+
outpred = torch.clamp(outpred, 0, 1)
|
1094 |
+
|
1095 |
+
outpred = torch.squeeze(outpred)
|
1096 |
+
|
1097 |
+
if FaceParserAmount <0:
|
1098 |
+
outpred = torch.neg(outpred)
|
1099 |
+
outpred = torch.add(outpred, 1)
|
1100 |
+
|
1101 |
+
kernel = torch.ones((1,1,3,3), dtype=torch.float32, device=device)
|
1102 |
+
|
1103 |
+
for i in range(int(-FaceParserAmount)):
|
1104 |
+
outpred = torch.nn.functional.conv2d(outpred, kernel, padding=(1, 1))
|
1105 |
+
outpred = torch.clamp(outpred, 0, 1)
|
1106 |
+
|
1107 |
+
outpred = torch.squeeze(outpred)
|
1108 |
+
outpred = torch.neg(outpred)
|
1109 |
+
outpred = torch.add(outpred, 1)
|
1110 |
+
|
1111 |
+
outpred = torch.reshape(outpred, (1, 512, 512))
|
1112 |
+
|
1113 |
+
return outpred
|
1114 |
+
|
1115 |
+
|
1116 |
+
|
1117 |
+
def apply_restorer(self, swapped_face_upscaled, parameters):
|
1118 |
+
temp = swapped_face_upscaled
|
1119 |
+
t512 = v2.Resize((512, 512), antialias=False)
|
1120 |
+
t256 = v2.Resize((256, 256), antialias=False)
|
1121 |
+
t1024 = v2.Resize((1024, 1024), antialias=False)
|
1122 |
+
|
1123 |
+
# If using a separate detection mode
|
1124 |
+
if parameters['RestorerDetTypeTextSel'] == 'Blend' or parameters['RestorerDetTypeTextSel'] == 'Reference':
|
1125 |
+
if parameters['RestorerDetTypeTextSel'] == 'Blend':
|
1126 |
+
# Set up Transformation
|
1127 |
+
dst = self.arcface_dst * 4.0
|
1128 |
+
dst[:,0] += 32.0
|
1129 |
+
|
1130 |
+
elif parameters['RestorerDetTypeTextSel'] == 'Reference':
|
1131 |
+
try:
|
1132 |
+
dst = self.models.resnet50(swapped_face_upscaled, score=parameters['DetectScoreSlider']/100.0)
|
1133 |
+
except:
|
1134 |
+
return swapped_face_upscaled
|
1135 |
+
|
1136 |
+
tform = trans.SimilarityTransform()
|
1137 |
+
tform.estimate(dst, self.FFHQ_kps)
|
1138 |
+
|
1139 |
+
# Transform, scale, and normalize
|
1140 |
+
temp = v2.functional.affine(swapped_face_upscaled, tform.rotation*57.2958, (tform.translation[0], tform.translation[1]) , tform.scale, 0, center = (0,0) )
|
1141 |
+
temp = v2.functional.crop(temp, 0,0, 512, 512)
|
1142 |
+
|
1143 |
+
temp = torch.div(temp, 255)
|
1144 |
+
temp = v2.functional.normalize(temp, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=False)
|
1145 |
+
if parameters['RestorerTypeTextSel'] == 'GPEN256':
|
1146 |
+
temp = t256(temp)
|
1147 |
+
temp = torch.unsqueeze(temp, 0).contiguous()
|
1148 |
+
|
1149 |
+
# Bindings
|
1150 |
+
outpred = torch.empty((1,3,512,512), dtype=torch.float32, device=device).contiguous()
|
1151 |
+
|
1152 |
+
if parameters['RestorerTypeTextSel'] == 'GFPGAN':
|
1153 |
+
self.models.run_GFPGAN(temp, outpred)
|
1154 |
+
|
1155 |
+
elif parameters['RestorerTypeTextSel'] == 'CF':
|
1156 |
+
self.models.run_codeformer(temp, outpred)
|
1157 |
+
|
1158 |
+
elif parameters['RestorerTypeTextSel'] == 'GPEN256':
|
1159 |
+
outpred = torch.empty((1,3,256,256), dtype=torch.float32, device=device).contiguous()
|
1160 |
+
self.models.run_GPEN_256(temp, outpred)
|
1161 |
+
|
1162 |
+
elif parameters['RestorerTypeTextSel'] == 'GPEN512':
|
1163 |
+
self.models.run_GPEN_512(temp, outpred)
|
1164 |
+
|
1165 |
+
elif parameters['RestorerTypeTextSel'] == 'GPEN1024':
|
1166 |
+
temp = t1024(temp)
|
1167 |
+
outpred = torch.empty((1, 3, 1024, 1024), dtype=torch.float32, device=device).contiguous()
|
1168 |
+
self.models.run_GPEN_1024(temp, outpred)
|
1169 |
+
|
1170 |
+
# Format back to cxHxW @ 255
|
1171 |
+
outpred = torch.squeeze(outpred)
|
1172 |
+
outpred = torch.clamp(outpred, -1, 1)
|
1173 |
+
outpred = torch.add(outpred, 1)
|
1174 |
+
outpred = torch.div(outpred, 2)
|
1175 |
+
outpred = torch.mul(outpred, 255)
|
1176 |
+
if parameters['RestorerTypeTextSel'] == 'GPEN256':
|
1177 |
+
outpred = t512(outpred)
|
1178 |
+
elif parameters['RestorerTypeTextSel'] == 'GPEN1024':
|
1179 |
+
outpred = t512(outpred)
|
1180 |
+
# Invert Transform
|
1181 |
+
if parameters['RestorerDetTypeTextSel'] == 'Blend' or parameters['RestorerDetTypeTextSel'] == 'Reference':
|
1182 |
+
outpred = v2.functional.affine(outpred, tform.inverse.rotation*57.2958, (tform.inverse.translation[0], tform.inverse.translation[1]), tform.inverse.scale, 0, interpolation=v2.InterpolationMode.BILINEAR, center = (0,0) )
|
1183 |
+
|
1184 |
+
# Blend
|
1185 |
+
alpha = float(parameters["RestorerSlider"])/100.0
|
1186 |
+
outpred = torch.add(torch.mul(outpred, alpha), torch.mul(swapped_face_upscaled, 1-alpha))
|
1187 |
+
|
1188 |
+
return outpred
|
1189 |
+
|
1190 |
+
def apply_fake_diff(self, swapped_face, original_face, DiffAmount):
|
1191 |
+
swapped_face = swapped_face.permute(1,2,0)
|
1192 |
+
original_face = original_face.permute(1,2,0)
|
1193 |
+
|
1194 |
+
diff = swapped_face-original_face
|
1195 |
+
diff = torch.abs(diff)
|
1196 |
+
|
1197 |
+
# Find the diffrence between the swap and original, per channel
|
1198 |
+
fthresh = DiffAmount*2.55
|
1199 |
+
|
1200 |
+
# Bimodal
|
1201 |
+
diff[diff<fthresh] = 0
|
1202 |
+
diff[diff>=fthresh] = 1
|
1203 |
+
|
1204 |
+
# If any of the channels exceeded the threshhold, them add them to the mask
|
1205 |
+
diff = torch.sum(diff, dim=2)
|
1206 |
+
diff = torch.unsqueeze(diff, 2)
|
1207 |
+
diff[diff>0] = 1
|
1208 |
+
|
1209 |
+
diff = diff.permute(2,0,1)
|
1210 |
+
|
1211 |
+
return diff
|
1212 |
+
|
1213 |
+
|
1214 |
+
|
1215 |
+
def clear_mem(self):
|
1216 |
+
del self.swapper_model
|
1217 |
+
del self.GFPGAN_model
|
1218 |
+
del self.occluder_model
|
1219 |
+
del self.face_parsing_model
|
1220 |
+
del self.codeformer_model
|
1221 |
+
del self.GPEN_256_model
|
1222 |
+
del self.GPEN_512_model
|
1223 |
+
del self.GPEN_1024_model
|
1224 |
+
del self.resnet_model
|
1225 |
+
del self.detection_model
|
1226 |
+
del self.recognition_model
|
1227 |
+
|
1228 |
+
self.swapper_model = []
|
1229 |
+
self.GFPGAN_model = []
|
1230 |
+
self.occluder_model = []
|
1231 |
+
self.face_parsing_model = []
|
1232 |
+
self.codeformer_model = []
|
1233 |
+
self.GPEN_256_model = []
|
1234 |
+
self.GPEN_512_model = []
|
1235 |
+
self.GPEN_1024_model = []
|
1236 |
+
self.resnet_model = []
|
1237 |
+
self.detection_model = []
|
1238 |
+
self.recognition_model = []
|
1239 |
+
|
1240 |
+
# test = swap.permute(1, 2, 0)
|
1241 |
+
# test = test.cpu().numpy()
|
1242 |
+
# cv2.imwrite('2.jpg', test)
|
rope/media/tl_beg_off.png
ADDED
rope/media/tl_beg_on.png
ADDED
rope/media/tl_left_hover.png
ADDED
rope/media/tl_left_off.png
ADDED
rope/media/tl_left_on.png
ADDED
rope/media/tl_right_hover.png
ADDED
rope/media/tl_right_off.png
ADDED
rope/media/tl_right_on.png
ADDED