Spaces:

sczhou
/

CodeFormer

Running on L4

App Files Files Community

sczhou commited on Sep 21

Commit

a4138cd

•

2 Parent(s): d26b29f a580cd2

update.

Browse files

Files changed (8) hide show

CodeFormer/.gitignore → .gitignore +5 -4
CodeFormer/basicsr/utils/misc.py +25 -2
CodeFormer/basicsr/version.py +2 -2
CodeFormer/facelib/utils/face_restoration_helper.py +77 -12
CodeFormer/facelib/utils/misc.py +32 -4
CodeFormer/inference_codeformer.py +126 -41
README.md +1 -1
app.py +12 -10

CodeFormer/.gitignore → .gitignore RENAMED Viewed

@@ -5,9 +5,9 @@ version.py
 # ignored files with suffix
 *.html
-# *.png
-# *.jpeg
-# *.jpg
 *.pt
 *.gif
 *.pth
@@ -122,7 +122,8 @@ venv.bak/
 .mypy_cache/
 # project
-results/
 dlib/
 *.pth
 *_old*

 # ignored files with suffix
 *.html
+*.png
+*.jpeg
+*.jpg
 *.pt
 *.gif
 *.pth
 .mypy_cache/
 # project
+CodeFormer/results/
+output/
 dlib/
 *.pth
 *_old*

CodeFormer/basicsr/utils/misc.py CHANGED Viewed

@@ -1,13 +1,36 @@
-import numpy as np
 import os
 import random
 import time
 import torch
 from os import path as osp
 from .dist_util import master_only
 from .logger import get_root_logger
 def set_random_seed(seed):
     """Set random seeds."""
@@ -131,4 +154,4 @@ def sizeof_fmt(size, suffix='B'):
         if abs(size) < 1024.0:
             return f'{size:3.1f} {unit}{suffix}'
         size /= 1024.0
-    return f'{size:3.1f} Y{suffix}'

 import os
+import re
 import random
 import time
 import torch
+import numpy as np
 from os import path as osp
 from .dist_util import master_only
 from .logger import get_root_logger
+IS_HIGH_VERSION = [int(m) for m in list(re.findall(r"^([0-9]+)\.([0-9]+)\.([0-9]+)([^0-9][a-zA-Z0-9]*)?(\+git.*)?$",\
+    torch.__version__)[0][:3])] >= [1, 12, 0]
+def gpu_is_available():
+    if IS_HIGH_VERSION:
+        if torch.backends.mps.is_available():
+            return True
+    return True if torch.cuda.is_available() and torch.backends.cudnn.is_available() else False
+def get_device(gpu_id=None):
+    if gpu_id is None:
+        gpu_str = ''
+    elif isinstance(gpu_id, int):
+        gpu_str = f':{gpu_id}'
+    else:
+        raise TypeError('Input should be int value.')
+    if IS_HIGH_VERSION:
+        if torch.backends.mps.is_available():
+            return torch.device('mps'+gpu_str)
+    return torch.device('cuda'+gpu_str if torch.cuda.is_available() and torch.backends.cudnn.is_available() else 'cpu')
 def set_random_seed(seed):
     """Set random seeds."""
         if abs(size) < 1024.0:
             return f'{size:3.1f} {unit}{suffix}'
         size /= 1024.0
+    return f'{size:3.1f} Y{suffix}'

CodeFormer/basicsr/version.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # GENERATED VERSION FILE
-# TIME: Sun Aug  7 15:14:26 2022
 __version__ = '1.3.2'
-__gitsha__ = '6f94023'
 version_info = (1, 3, 2)

 # GENERATED VERSION FILE
+# TIME: Sat Sep 21 15:31:46 2024
 __version__ = '1.3.2'
+__gitsha__ = '1.3.2'
 version_info = (1, 3, 2)

CodeFormer/facelib/utils/face_restoration_helper.py CHANGED Viewed

@@ -6,8 +6,14 @@ from torchvision.transforms.functional import normalize
 from facelib.detection import init_detection_model
 from facelib.parsing import init_parsing_model
-from facelib.utils.misc import img2tensor, imwrite, is_gray, bgr2gray
 def get_largest_face(det_faces, h, w):
@@ -64,8 +70,15 @@ class FaceRestoreHelper(object):
         self.crop_ratio = crop_ratio  # (h, w)
         assert (self.crop_ratio[0] >= 1 and self.crop_ratio[1] >= 1), 'crop ration only supports >=1'
         self.face_size = (int(face_size * self.crop_ratio[1]), int(face_size * self.crop_ratio[0]))
-        if self.template_3points:
             self.face_template = np.array([[192, 240], [319, 240], [257, 371]])
         else:
             # standard 5 landmarks for FFHQ faces with 512 x 512
@@ -77,7 +90,6 @@ class FaceRestoreHelper(object):
             # self.face_template = np.array([[193.65928, 242.98541], [318.32558, 243.06108], [255.67984, 328.82894],
             #                                 [198.22603, 372.82502], [313.91018, 372.75659]])
         self.face_template = self.face_template * (face_size / 512.0)
         if self.crop_ratio[0] > 1:
             self.face_template[:, 1] += face_size * (self.crop_ratio[0] - 1) / 2
@@ -97,12 +109,16 @@ class FaceRestoreHelper(object):
         self.pad_input_imgs = []
         if device is None:
-            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         else:
             self.device = device
         # init face detection model
-        self.face_det = init_detection_model(det_model, half=False, device=self.device)
         # init face parsing model
         self.use_parse = use_parse
@@ -125,7 +141,7 @@ class FaceRestoreHelper(object):
             img = img[:, :, 0:3]
         self.input_img = img
-        self.is_gray = is_gray(img, threshold=5)
         if self.is_gray:
             print('Grayscale input: True')
@@ -133,25 +149,72 @@ class FaceRestoreHelper(object):
             f = 512.0/min(self.input_img.shape[:2])
             self.input_img = cv2.resize(self.input_img, (0,0), fx=f, fy=f, interpolation=cv2.INTER_LINEAR)
     def get_face_landmarks_5(self,
                              only_keep_largest=False,
                              only_center_face=False,
                              resize=None,
                              blur_ratio=0.01,
                              eye_dist_threshold=None):
         if resize is None:
             scale = 1
             input_img = self.input_img
         else:
             h, w = self.input_img.shape[0:2]
             scale = resize / min(h, w)
-            scale = max(1, scale) # always scale up
             h, w = int(h * scale), int(w * scale)
             interp = cv2.INTER_AREA if scale < 1 else cv2.INTER_LINEAR
             input_img = cv2.resize(self.input_img, (w, h), interpolation=interp)
         with torch.no_grad():
-            bboxes = self.face_det.detect_faces(input_img)
         if bboxes is None or bboxes.shape[0] == 0:
             return 0
@@ -298,10 +361,12 @@ class FaceRestoreHelper(object):
                 torch.save(inverse_affine, save_path)
-    def add_restored_face(self, face):
         if self.is_gray:
-            face = bgr2gray(face) # convert img into grayscale
-        self.restored_faces.append(face)
     def paste_faces_to_input_image(self, save_path=None, upsample_img=None, draw_box=False, face_upsampler=None):

 from facelib.detection import init_detection_model
 from facelib.parsing import init_parsing_model
+from facelib.utils.misc import img2tensor, imwrite, is_gray, bgr2gray, adain_npy
+from basicsr.utils.download_util import load_file_from_url
+from basicsr.utils.misc import get_device
+dlib_model_url = {
+    'face_detector': 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/mmod_human_face_detector-4cb19393.dat',
+    'shape_predictor_5': 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/shape_predictor_5_face_landmarks-c4b1e980.dat'
+}
 def get_largest_face(det_faces, h, w):
         self.crop_ratio = crop_ratio  # (h, w)
         assert (self.crop_ratio[0] >= 1 and self.crop_ratio[1] >= 1), 'crop ration only supports >=1'
         self.face_size = (int(face_size * self.crop_ratio[1]), int(face_size * self.crop_ratio[0]))
+        self.det_model = det_model
+        if self.det_model == 'dlib':
+            # standard 5 landmarks for FFHQ faces with 1024 x 1024
+            self.face_template = np.array([[686.77227723, 488.62376238], [586.77227723, 493.59405941],
+                                        [337.91089109, 488.38613861], [437.95049505, 493.51485149],
+                                        [513.58415842, 678.5049505]])
+            self.face_template = self.face_template / (1024 // face_size)
+        elif self.template_3points:
             self.face_template = np.array([[192, 240], [319, 240], [257, 371]])
         else:
             # standard 5 landmarks for FFHQ faces with 512 x 512
             # self.face_template = np.array([[193.65928, 242.98541], [318.32558, 243.06108], [255.67984, 328.82894],
             #                                 [198.22603, 372.82502], [313.91018, 372.75659]])
         self.face_template = self.face_template * (face_size / 512.0)
         if self.crop_ratio[0] > 1:
             self.face_template[:, 1] += face_size * (self.crop_ratio[0] - 1) / 2
         self.pad_input_imgs = []
         if device is None:
+            # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+            self.device = get_device()
         else:
             self.device = device
         # init face detection model
+        if self.det_model == 'dlib':
+            self.face_detector, self.shape_predictor_5 = self.init_dlib(dlib_model_url['face_detector'], dlib_model_url['shape_predictor_5'])
+        else:
+            self.face_detector = init_detection_model(det_model, half=False, device=self.device)
         # init face parsing model
         self.use_parse = use_parse
             img = img[:, :, 0:3]
         self.input_img = img
+        self.is_gray = is_gray(img, threshold=10)
         if self.is_gray:
             print('Grayscale input: True')
             f = 512.0/min(self.input_img.shape[:2])
             self.input_img = cv2.resize(self.input_img, (0,0), fx=f, fy=f, interpolation=cv2.INTER_LINEAR)
+    def init_dlib(self, detection_path, landmark5_path):
+        """Initialize the dlib detectors and predictors."""
+        try:
+            import dlib
+        except ImportError:
+            print('Please install dlib by running:' 'conda install -c conda-forge dlib')
+        detection_path = load_file_from_url(url=detection_path, model_dir='weights/dlib', progress=True, file_name=None)
+        landmark5_path = load_file_from_url(url=landmark5_path, model_dir='weights/dlib', progress=True, file_name=None)
+        face_detector = dlib.cnn_face_detection_model_v1(detection_path)
+        shape_predictor_5 = dlib.shape_predictor(landmark5_path)
+        return face_detector, shape_predictor_5
+    def get_face_landmarks_5_dlib(self,
+                                only_keep_largest=False,
+                                scale=1):
+        det_faces = self.face_detector(self.input_img, scale)
+        if len(det_faces) == 0:
+            print('No face detected. Try to increase upsample_num_times.')
+            return 0
+        else:
+            if only_keep_largest:
+                print('Detect several faces and only keep the largest.')
+                face_areas = []
+                for i in range(len(det_faces)):
+                    face_area = (det_faces[i].rect.right() - det_faces[i].rect.left()) * (
+                        det_faces[i].rect.bottom() - det_faces[i].rect.top())
+                    face_areas.append(face_area)
+                largest_idx = face_areas.index(max(face_areas))
+                self.det_faces = [det_faces[largest_idx]]
+            else:
+                self.det_faces = det_faces
+        if len(self.det_faces) == 0:
+            return 0
+        for face in self.det_faces:
+            shape = self.shape_predictor_5(self.input_img, face.rect)
+            landmark = np.array([[part.x, part.y] for part in shape.parts()])
+            self.all_landmarks_5.append(landmark)
+        return len(self.all_landmarks_5)
     def get_face_landmarks_5(self,
                              only_keep_largest=False,
                              only_center_face=False,
                              resize=None,
                              blur_ratio=0.01,
                              eye_dist_threshold=None):
+        if self.det_model == 'dlib':
+            return self.get_face_landmarks_5_dlib(only_keep_largest)
         if resize is None:
             scale = 1
             input_img = self.input_img
         else:
             h, w = self.input_img.shape[0:2]
             scale = resize / min(h, w)
+            # scale = max(1, scale) # always scale up; comment this out for HD images, e.g., AIGC faces.
             h, w = int(h * scale), int(w * scale)
             interp = cv2.INTER_AREA if scale < 1 else cv2.INTER_LINEAR
             input_img = cv2.resize(self.input_img, (w, h), interpolation=interp)
         with torch.no_grad():
+            bboxes = self.face_detector.detect_faces(input_img)
         if bboxes is None or bboxes.shape[0] == 0:
             return 0
                 torch.save(inverse_affine, save_path)
+    def add_restored_face(self, restored_face, input_face=None):
         if self.is_gray:
+            restored_face = bgr2gray(restored_face) # convert img into grayscale
+            if input_face is not None:
+                restored_face = adain_npy(restored_face, input_face) # transfer the color
+        self.restored_faces.append(restored_face)
     def paste_faces_to_input_image(self, save_path=None, upsample_img=None, draw_box=False, face_upsampler=None):

CodeFormer/facelib/utils/misc.py CHANGED Viewed

@@ -7,13 +7,13 @@ import torch
 from torch.hub import download_url_to_file, get_dir
 from urllib.parse import urlparse
 # from basicsr.utils.download_util import download_file_from_google_drive
-# import gdown
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 def download_pretrained_models(file_ids, save_path_root):
     os.makedirs(save_path_root, exist_ok=True)
     for file_name, file_id in file_ids.items():
@@ -23,7 +23,7 @@ def download_pretrained_models(file_ids, save_path_root):
             user_response = input(f'{file_name} already exist. Do you want to cover it? Y/N\n')
             if user_response.lower() == 'y':
                 print(f'Covering {file_name} to {save_path}')
-                # gdown.download(file_url, save_path, quiet=False)
                 # download_file_from_google_drive(file_id, save_path)
             elif user_response.lower() == 'n':
                 print(f'Skipping {file_name}')
@@ -31,7 +31,7 @@ def download_pretrained_models(file_ids, save_path_root):
                 raise ValueError('Wrong input. Only accepts Y/N.')
         else:
             print(f'Downloading {file_name} to {save_path}')
-            # gdown.download(file_url, save_path, quiet=False)
             # download_file_from_google_drive(file_id, save_path)
@@ -172,3 +172,31 @@ def bgr2gray(img, out_channel=3):
     if out_channel == 3:
         gray = gray[:,:,np.newaxis].repeat(3, axis=2)
     return gray

 from torch.hub import download_url_to_file, get_dir
 from urllib.parse import urlparse
 # from basicsr.utils.download_util import download_file_from_google_drive
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 def download_pretrained_models(file_ids, save_path_root):
+    import gdown
     os.makedirs(save_path_root, exist_ok=True)
     for file_name, file_id in file_ids.items():
             user_response = input(f'{file_name} already exist. Do you want to cover it? Y/N\n')
             if user_response.lower() == 'y':
                 print(f'Covering {file_name} to {save_path}')
+                gdown.download(file_url, save_path, quiet=False)
                 # download_file_from_google_drive(file_id, save_path)
             elif user_response.lower() == 'n':
                 print(f'Skipping {file_name}')
                 raise ValueError('Wrong input. Only accepts Y/N.')
         else:
             print(f'Downloading {file_name} to {save_path}')
+            gdown.download(file_url, save_path, quiet=False)
             # download_file_from_google_drive(file_id, save_path)
     if out_channel == 3:
         gray = gray[:,:,np.newaxis].repeat(3, axis=2)
     return gray
+def calc_mean_std(feat, eps=1e-5):
+    """
+    Args:
+        feat (numpy): 3D [w h c]s
+    """
+    size = feat.shape
+    assert len(size) == 3, 'The input feature should be 3D tensor.'
+    c = size[2]
+    feat_var = feat.reshape(-1, c).var(axis=0) + eps
+    feat_std = np.sqrt(feat_var).reshape(1, 1, c)
+    feat_mean = feat.reshape(-1, c).mean(axis=0).reshape(1, 1, c)
+    return feat_mean, feat_std
+def adain_npy(content_feat, style_feat):
+    """Adaptive instance normalization for numpy.
+    Args:
+        content_feat (numpy): The input feature.
+        style_feat (numpy): The reference feature.
+    """
+    size = content_feat.shape
+    style_mean, style_std = calc_mean_std(style_feat)
+    content_mean, content_std = calc_mean_std(content_feat)
+    normalized_feat = (content_feat - np.broadcast_to(content_mean, size)) / np.broadcast_to(content_std, size)
+    return normalized_feat * np.broadcast_to(style_std, size) + np.broadcast_to(style_mean, size)

CodeFormer/inference_codeformer.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# Modified by Shangchen Zhou from: https://github.com/TencentARC/GFPGAN/blob/master/inference_gfpgan.py
 import os
 import cv2
 import argparse
@@ -7,8 +6,9 @@ import torch
 from torchvision.transforms.functional import normalize
 from basicsr.utils import imwrite, img2tensor, tensor2img
 from basicsr.utils.download_util import load_file_from_url
 from facelib.utils.face_restoration_helper import FaceRestoreHelper
-import torch.nn.functional as F
 from basicsr.utils.registry import ARCH_REGISTRY
@@ -17,51 +17,104 @@ pretrain_model_url = {
 }
 def set_realesrgan():
-    if not torch.cuda.is_available():  # CPU
         import warnings
-        warnings.warn('The unoptimized RealESRGAN is slow on CPU. We do not use it. '
-                        'If you really want to use it, please modify the corresponding codes.',
                         category=RuntimeWarning)
-        bg_upsampler = None
-    else:
-        from basicsr.archs.rrdbnet_arch import RRDBNet
-        from basicsr.utils.realesrgan_utils import RealESRGANer
-        model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=2)
-        bg_upsampler = RealESRGANer(
-            scale=2,
-            model_path='https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth',
-            model=model,
-            tile=args.bg_tile,
-            tile_pad=40,
-            pre_pad=0,
-            half=True)  # need to set False in CPU mode
-    return bg_upsampler
 if __name__ == '__main__':
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     parser = argparse.ArgumentParser()
-    parser.add_argument('--w', type=float, default=0.5, help='Balance the quality and fidelity')
-    parser.add_argument('--upscale', type=int, default=2, help='The final upsampling scale of the image. Default: 2')
-    parser.add_argument('--test_path', type=str, default='./inputs/cropped_faces')
-    parser.add_argument('--has_aligned', action='store_true', help='Input are cropped and aligned faces')
-    parser.add_argument('--only_center_face', action='store_true', help='Only restore the center face')
     # large det_model: 'YOLOv5l', 'retinaface_resnet50'
     # small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
-    parser.add_argument('--detection_model', type=str, default='retinaface_resnet50')
-    parser.add_argument('--draw_box', action='store_true')
-    parser.add_argument('--bg_upsampler', type=str, default='None', help='background upsampler. Optional: realesrgan')
-    parser.add_argument('--face_upsample', action='store_true', help='face upsampler after enhancement.')
     parser.add_argument('--bg_tile', type=int, default=400, help='Tile size for background sampler. Default: 400')
     args = parser.parse_args()
     # ------------------------ input & output ------------------------
-    if args.test_path.endswith('/'):  # solve when path ends with /
-        args.test_path = args.test_path[:-1]
-    w = args.w
-    result_root = f'results/{os.path.basename(args.test_path)}_{w}'
     # ------------------ set up background upsampler ------------------
     if args.bg_upsampler == 'realesrgan':
@@ -109,19 +162,27 @@ if __name__ == '__main__':
         device=device)
     # -------------------- start to processing ---------------------
-    # scan all the jpg and png images
-    for img_path in sorted(glob.glob(os.path.join(args.test_path, '*.[jp][pn]g'))):
         # clean all the intermediate results to process the next image
         face_helper.clean_all()
-        img_name = os.path.basename(img_path)
-        print(f'Processing: {img_name}')
-        basename, ext = os.path.splitext(img_name)
-        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
         if args.has_aligned:
             # the input faces are already cropped and aligned
             img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR)
             face_helper.cropped_faces = [img]
         else:
             face_helper.read_image(img)
@@ -150,7 +211,7 @@ if __name__ == '__main__':
                 restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
             restored_face = restored_face.astype('uint8')
-            face_helper.add_restored_face(restored_face)
         # paste_back
         if not args.has_aligned:
@@ -178,12 +239,36 @@ if __name__ == '__main__':
                 save_face_name = f'{basename}.png'
             else:
                 save_face_name = f'{basename}_{idx:02d}.png'
             save_restore_path = os.path.join(result_root, 'restored_faces', save_face_name)
             imwrite(restored_face, save_restore_path)
         # save restored img
         if not args.has_aligned and restored_img is not None:
             save_restore_path = os.path.join(result_root, 'final_results', f'{basename}.png')
             imwrite(restored_img, save_restore_path)
-    print(f'\nAll results are saved in {result_root}')

 import os
 import cv2
 import argparse
 from torchvision.transforms.functional import normalize
 from basicsr.utils import imwrite, img2tensor, tensor2img
 from basicsr.utils.download_util import load_file_from_url
+from basicsr.utils.misc import gpu_is_available, get_device
 from facelib.utils.face_restoration_helper import FaceRestoreHelper
+from facelib.utils.misc import is_gray
 from basicsr.utils.registry import ARCH_REGISTRY
 }
 def set_realesrgan():
+    from basicsr.archs.rrdbnet_arch import RRDBNet
+    from basicsr.utils.realesrgan_utils import RealESRGANer
+    use_half = False
+    if torch.cuda.is_available(): # set False in CPU/MPS mode
+        no_half_gpu_list = ['1650', '1660'] # set False for GPUs that don't support f16
+        if not True in [gpu in torch.cuda.get_device_name(0) for gpu in no_half_gpu_list]:
+            use_half = True
+    model = RRDBNet(
+        num_in_ch=3,
+        num_out_ch=3,
+        num_feat=64,
+        num_block=23,
+        num_grow_ch=32,
+        scale=2,
+    )
+    upsampler = RealESRGANer(
+        scale=2,
+        model_path="https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/RealESRGAN_x2plus.pth",
+        model=model,
+        tile=args.bg_tile,
+        tile_pad=40,
+        pre_pad=0,
+        half=use_half
+    )
+    if not gpu_is_available():  # CPU
         import warnings
+        warnings.warn('Running on CPU now! Make sure your PyTorch version matches your CUDA.'
+                        'The unoptimized RealESRGAN is slow on CPU. '
+                        'If you want to disable it, please remove `--bg_upsampler` and `--face_upsample` in command.',
                         category=RuntimeWarning)
+    return upsampler
 if __name__ == '__main__':
+    # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    device = get_device()
     parser = argparse.ArgumentParser()
+    parser.add_argument('-i', '--input_path', type=str, default='./inputs/whole_imgs',
+            help='Input image, video or folder. Default: inputs/whole_imgs')
+    parser.add_argument('-o', '--output_path', type=str, default=None,
+            help='Output folder. Default: results/<input_name>_<w>')
+    parser.add_argument('-w', '--fidelity_weight', type=float, default=0.5,
+            help='Balance the quality and fidelity. Default: 0.5')
+    parser.add_argument('-s', '--upscale', type=int, default=2,
+            help='The final upsampling scale of the image. Default: 2')
+    parser.add_argument('--has_aligned', action='store_true', help='Input are cropped and aligned faces. Default: False')
+    parser.add_argument('--only_center_face', action='store_true', help='Only restore the center face. Default: False')
+    parser.add_argument('--draw_box', action='store_true', help='Draw the bounding box for the detected faces. Default: False')
     # large det_model: 'YOLOv5l', 'retinaface_resnet50'
     # small det_model: 'YOLOv5n', 'retinaface_mobile0.25'
+    parser.add_argument('--detection_model', type=str, default='retinaface_resnet50',
+            help='Face detector. Optional: retinaface_resnet50, retinaface_mobile0.25, YOLOv5l, YOLOv5n, dlib. \
+                Default: retinaface_resnet50')
+    parser.add_argument('--bg_upsampler', type=str, default='None', help='Background upsampler. Optional: realesrgan')
+    parser.add_argument('--face_upsample', action='store_true', help='Face upsampler after enhancement. Default: False')
     parser.add_argument('--bg_tile', type=int, default=400, help='Tile size for background sampler. Default: 400')
+    parser.add_argument('--suffix', type=str, default=None, help='Suffix of the restored faces. Default: None')
+    parser.add_argument('--save_video_fps', type=float, default=None, help='Frame rate for saving video. Default: None')
     args = parser.parse_args()
     # ------------------------ input & output ------------------------
+    w = args.fidelity_weight
+    input_video = False
+    if args.input_path.endswith(('jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG')): # input single img path
+        input_img_list = [args.input_path]
+        result_root = f'results/test_img_{w}'
+    elif args.input_path.endswith(('mp4', 'mov', 'avi', 'MP4', 'MOV', 'AVI')): # input video path
+        from basicsr.utils.video_util import VideoReader, VideoWriter
+        input_img_list = []
+        vidreader = VideoReader(args.input_path)
+        image = vidreader.get_frame()
+        while image is not None:
+            input_img_list.append(image)
+            image = vidreader.get_frame()
+        audio = vidreader.get_audio()
+        fps = vidreader.get_fps() if args.save_video_fps is None else args.save_video_fps
+        video_name = os.path.basename(args.input_path)[:-4]
+        result_root = f'results/{video_name}_{w}'
+        input_video = True
+        vidreader.close()
+    else: # input img folder
+        if args.input_path.endswith('/'):  # solve when path ends with /
+            args.input_path = args.input_path[:-1]
+        # scan all the jpg and png images
+        input_img_list = sorted(glob.glob(os.path.join(args.input_path, '*.[jpJP][pnPN]*[gG]')))
+        result_root = f'results/{os.path.basename(args.input_path)}_{w}'
+    if not args.output_path is None: # set output path
+        result_root = args.output_path
+    test_img_num = len(input_img_list)
+    if test_img_num == 0:
+        raise FileNotFoundError('No input image/video is found...\n'
+            '\tNote that --input_path for video should end with .mp4|.mov|.avi')
     # ------------------ set up background upsampler ------------------
     if args.bg_upsampler == 'realesrgan':
         device=device)
     # -------------------- start to processing ---------------------
+    for i, img_path in enumerate(input_img_list):
         # clean all the intermediate results to process the next image
         face_helper.clean_all()
+        if isinstance(img_path, str):
+            img_name = os.path.basename(img_path)
+            basename, ext = os.path.splitext(img_name)
+            print(f'[{i+1}/{test_img_num}] Processing: {img_name}')
+            img = cv2.imread(img_path, cv2.IMREAD_COLOR)
+        else: # for video processing
+            basename = str(i).zfill(6)
+            img_name = f'{video_name}_{basename}' if input_video else basename
+            print(f'[{i+1}/{test_img_num}] Processing: {img_name}')
+            img = img_path
         if args.has_aligned:
             # the input faces are already cropped and aligned
             img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR)
+            face_helper.is_gray = is_gray(img, threshold=10)
+            if face_helper.is_gray:
+                print('Grayscale input: True')
             face_helper.cropped_faces = [img]
         else:
             face_helper.read_image(img)
                 restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
             restored_face = restored_face.astype('uint8')
+            face_helper.add_restored_face(restored_face, cropped_face)
         # paste_back
         if not args.has_aligned:
                 save_face_name = f'{basename}.png'
             else:
                 save_face_name = f'{basename}_{idx:02d}.png'
+            if args.suffix is not None:
+                save_face_name = f'{save_face_name[:-4]}_{args.suffix}.png'
             save_restore_path = os.path.join(result_root, 'restored_faces', save_face_name)
             imwrite(restored_face, save_restore_path)
         # save restored img
         if not args.has_aligned and restored_img is not None:
+            if args.suffix is not None:
+                basename = f'{basename}_{args.suffix}'
             save_restore_path = os.path.join(result_root, 'final_results', f'{basename}.png')
             imwrite(restored_img, save_restore_path)
+    # save enhanced video
+    if input_video:
+        print('Video Saving...')
+        # load images
+        video_frames = []
+        img_list = sorted(glob.glob(os.path.join(result_root, 'final_results', '*.[jp][pn]g')))
+        for img_path in img_list:
+            img = cv2.imread(img_path)
+            video_frames.append(img)
+        # write images to video
+        height, width = video_frames[0].shape[:2]
+        if args.suffix is not None:
+            video_name = f'{video_name}_{args.suffix}.png'
+        save_restore_path = os.path.join(result_root, f'{video_name}.mp4')
+        vidwriter = VideoWriter(save_restore_path, height, width, fps, audio)
+        for f in video_frames:
+            vidwriter.write_frame(f)
+        vidwriter.close()
+    print(f'\nAll results are saved in {result_root}')

README.md CHANGED Viewed

@@ -9,4 +9,4 @@ app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 pinned: false
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -16,9 +16,9 @@ from torchvision.transforms.functional import normalize
 from basicsr.utils import imwrite, img2tensor, tensor2img
 from basicsr.utils.download_util import load_file_from_url
 from facelib.utils.face_restoration_helper import FaceRestoreHelper
-from facelib.utils.misc import is_gray
 from basicsr.archs.rrdbnet_arch import RRDBNet
 from basicsr.utils.realesrgan_utils import RealESRGANer
 from basicsr.utils.registry import ARCH_REGISTRY
@@ -166,9 +166,7 @@ def inference(image, face_align, background_enhance, face_upsample, upscale, cod
         # face restoration for each cropped face
         for idx, cropped_face in enumerate(face_helper.cropped_faces):
             # prepare data
-            cropped_face_t = img2tensor(
-                cropped_face / 255.0, bgr2rgb=True, float32=True
-            )
             normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
             cropped_face_t = cropped_face_t.unsqueeze(0).to(device)
@@ -182,12 +180,10 @@ def inference(image, face_align, background_enhance, face_upsample, upscale, cod
                 torch.cuda.empty_cache()
             except RuntimeError as error:
                 print(f"Failed inference for CodeFormer: {error}")
-                restored_face = tensor2img(
-                    cropped_face_t, rgb2bgr=True, min_max=(-1, 1)
-                )
             restored_face = restored_face.astype("uint8")
-            face_helper.add_restored_face(restored_face)
         # paste_back
         if not has_aligned:
@@ -264,6 +260,12 @@ If you have any questions, please feel free to reach me out at <b>shangchenzhou@
 td {
     padding-right: 0px !important;
 }
 </style>
 <table>
@@ -302,5 +304,5 @@ demo = gr.Interface(
     )
 DEBUG = os.getenv('DEBUG') == '1'
-demo.launch(debug=DEBUG)
-# demo.launch(debug=DEBUG, share=True)

 from basicsr.utils import imwrite, img2tensor, tensor2img
 from basicsr.utils.download_util import load_file_from_url
 from facelib.utils.face_restoration_helper import FaceRestoreHelper
 from basicsr.archs.rrdbnet_arch import RRDBNet
 from basicsr.utils.realesrgan_utils import RealESRGANer
+from facelib.utils.misc import is_gray
 from basicsr.utils.registry import ARCH_REGISTRY
         # face restoration for each cropped face
         for idx, cropped_face in enumerate(face_helper.cropped_faces):
             # prepare data
+            cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True)
             normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
             cropped_face_t = cropped_face_t.unsqueeze(0).to(device)
                 torch.cuda.empty_cache()
             except RuntimeError as error:
                 print(f"Failed inference for CodeFormer: {error}")
+                restored_face = tensor2img(cropped_face_t, rgb2bgr=True, min_max=(-1, 1))
             restored_face = restored_face.astype("uint8")
+            face_helper.add_restored_face(restored_face, cropped_face)
         # paste_back
         if not has_aligned:
 td {
     padding-right: 0px !important;
 }
+.gradio-container-4-37-2 .prose table, .gradio-container-4-37-2 .prose tr, .gradio-container-4-37-2 .prose td, .gradio-container-4-37-2 .prose th {
+    border: 0px solid #ffffff;
+    border-bottom: 0px solid #ffffff;
+}
 </style>
 <table>
     )
 DEBUG = os.getenv('DEBUG') == '1'
+# demo.launch(debug=DEBUG)
+demo.launch(debug=DEBUG, share=True)