|
import gradio as gr |
|
import cv2 |
|
import numpy as np |
|
import tensorflow as tf |
|
import tensorflow_addons |
|
|
|
from facenet_pytorch import MTCNN |
|
from PIL import Image |
|
import moviepy.editor as mp |
|
import os |
|
import zipfile |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mtcnn = MTCNN(margin=14, keep_all=True, factor=0.7, device='cpu') |
|
|
|
|
|
class DetectionPipeline: |
|
"""Pipeline class for detecting faces in the frames of a video file.""" |
|
|
|
def __init__(self, detector, n_frames=None, batch_size=60, resize=None): |
|
"""Constructor for DetectionPipeline class. |
|
|
|
Keyword Arguments: |
|
n_frames {int} -- Total number of frames to load. These will be evenly spaced |
|
throughout the video. If not specified (i.e., None), all frames will be loaded. |
|
(default: {None}) |
|
batch_size {int} -- Batch size to use with MTCNN face detector. (default: {32}) |
|
resize {float} -- Fraction by which to resize frames from original prior to face |
|
detection. A value less than 1 results in downsampling and a value greater than |
|
1 result in upsampling. (default: {None}) |
|
""" |
|
self.detector = detector |
|
self.n_frames = n_frames |
|
self.batch_size = batch_size |
|
self.resize = resize |
|
|
|
def __call__(self, filename): |
|
"""Load frames from an MP4 video and detect faces. |
|
|
|
Arguments: |
|
filename {str} -- Path to video. |
|
""" |
|
|
|
v_cap = cv2.VideoCapture(filename) |
|
v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
|
|
|
if self.n_frames is None: |
|
sample = np.arange(0, v_len) |
|
else: |
|
sample = np.linspace(0, v_len - 1, self.n_frames).astype(int) |
|
|
|
|
|
faces = [] |
|
frames = [] |
|
for j in range(v_len): |
|
success = v_cap.grab() |
|
if j in sample: |
|
|
|
success, frame = v_cap.retrieve() |
|
if not success: |
|
continue |
|
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
|
|
|
|
|
|
|
if self.resize is not None: |
|
frame = frame.resize([int(d * self.resize) for d in frame.size]) |
|
frames.append(frame) |
|
|
|
|
|
if len(frames) % self.batch_size == 0 or j == sample[-1]: |
|
|
|
boxes, probs = self.detector.detect(frames) |
|
|
|
for i in range(len(frames)): |
|
|
|
if boxes[i] is None: |
|
faces.append(face2) |
|
continue |
|
|
|
box = boxes[i][0].astype(int) |
|
frame = frames[i] |
|
face = frame[box[1]:box[3], box[0]:box[2]] |
|
|
|
if not face.any(): |
|
faces.append(face2) |
|
continue |
|
|
|
face2 = cv2.resize(face, (224, 224)) |
|
|
|
faces.append(face2) |
|
|
|
frames = [] |
|
|
|
v_cap.release() |
|
|
|
return faces |
|
|
|
|
|
detection_pipeline = DetectionPipeline(detector=mtcnn,n_frames=20, batch_size=60) |
|
|
|
model = tf.keras.models.load_model("./EfficientNetV2_Deepfakes_Video_Detector/p1") |
|
|
|
|
|
def deepfakespredict(input_video): |
|
|
|
faces = detection_pipeline(input_video) |
|
|
|
total = 0 |
|
real = 0 |
|
fake = 0 |
|
|
|
for face in faces: |
|
|
|
face2 = face/255 |
|
pred = model.predict(np.expand_dims(face2, axis=0))[0] |
|
total+=1 |
|
|
|
pred2 = pred[1] |
|
|
|
if pred2 > 0.5: |
|
fake+=1 |
|
else: |
|
real+=1 |
|
|
|
fake_ratio = fake/total |
|
|
|
text ="" |
|
text2 = "Deepfakes Confidence: " + str(fake_ratio*100) + "%" |
|
|
|
if fake_ratio >= 0.5: |
|
text = "The video is FAKE." |
|
else: |
|
text = "The video is REAL." |
|
|
|
face_frames = [] |
|
|
|
for face in faces: |
|
face_frame = Image.fromarray(face.astype('uint8'), 'RGB') |
|
face_frames.append(face_frame) |
|
|
|
face_frames[0].save('results.gif', save_all=True, append_images=face_frames[1:], duration = 250, loop = 100 ) |
|
clip = mp.VideoFileClip("results.gif") |
|
clip.write_videofile("video.mp4") |
|
|
|
return text, text2, "video.mp4" |
|
|
|
|
|
|
|
title="EfficientNetV2 Deepfakes Video Detector" |
|
description="This is a demo implementation of EfficientNetV2 Deepfakes Image Detector by using frame-by-frame detection. \ |
|
To use it, simply upload your video, or click one of the examples to load them.\ |
|
This demo and model represent the Final Year Project titled \"Achieving Face Swapped Deepfakes Detection Using EfficientNetV2\" by a CS undergraduate Lee Sheng Yeh. \ |
|
The examples were extracted from Celeb-DF(V2)(Li et al, 2020) and FaceForensics++(Rossler et al., 2019). Full reference details is available in \"references.txt.\" \ |
|
The examples are used under fair use to demo the working of the model only. If any copyright is infringed, please contact the researcher via this email: [email protected].\ |
|
" |
|
|
|
examples = [ |
|
['./EfficientNetV2_Deepfakes_Video_Detector/Video1-fake-1-ff.mp4'], |
|
['./EfficientNetV2_Deepfakes_Video_Detector/Video6-real-1-ff.mp4'], |
|
['./EfficientNetV2_Deepfakes_Video_Detector/Video3-fake-3-ff.mp4'], |
|
['./EfficientNetV2_Deepfakes_Video_Detector/Video8-real-3-ff.mp4'], |
|
['./EfficientNetV2_Deepfakes_Video_Detector/real-1.mp4'], |
|
['./EfficientNetV2_Deepfakes_Video_Detector/fake-1.mp4'], |
|
] |
|
|
|
gr.Interface(deepfakespredict, |
|
inputs = ["video"], |
|
outputs=["text","text", gr.outputs.Video(label="Detected face sequence")], |
|
title=title, |
|
description=description, |
|
examples=examples |
|
).launch() |