Spaces:

ssaad5678
/

deepfake_detect

Runtime error

App Files Files Community

ssaad5678 commited on Mar 30

Commit

539512b

•

1 Parent(s): 448c116

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -63

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ mtcnn = MTCNN(
     select_largest=False,
     post_process=False,
     device=DEVICE
-).eval()
 model = InceptionResnetV1(
     pretrained="vggface2",
     classify=True,
@@ -38,88 +38,81 @@ def predict_frame(frame):
     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     frame_pil = Image.fromarray(frame)
-    # Detect faces
-    boxes, probs = mtcnn.detect(frame_pil)
-    faces = []
-    confidences = []
-    if boxes is not None:
-        for box, prob in zip(boxes, probs):
-            # Extract face
-            x1, y1, x2, y2 = box.astype(int)
-            face = frame[y1:y2, x1:x2]
-            # Preprocess the face
-            face = cv2.resize(face, (160, 160))  # Resize to match InceptionResnetV1 input size
-            face = torch.tensor(face).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
-            # Predict
-            with torch.no_grad():
-                output = torch.sigmoid(model(face).squeeze())
-                prediction = "real" if output.item() < 0.5 else "fake"
-                # Confidence scores
-                real_prediction = 1 - output.item()
-                fake_prediction = output.item()
-                confidences.append({
-                    'prediction': prediction,
-                    'confidence': fake_prediction if prediction == 'fake' else real_prediction
-                })
-                # Visualize
-                target_layers = [model.block8.branch1[-1]]
-                use_cuda = True if torch.cuda.is_available() else False
-                cam = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda)
-                targets = [ClassifierOutputTarget(0)]
-                grayscale_cam = cam(input_tensor=face, targets=targets, eigen_smooth=True)
-                grayscale_cam = grayscale_cam[0, :]
-                visualization = show_cam_on_image(face.squeeze().permute(1, 2, 0).cpu().numpy(), grayscale_cam, use_rgb=True)
-                face_with_mask = cv2.addWeighted((face.squeeze().permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8), 1, (visualization * 255).astype(np.uint8), 0.5, 0)
-                faces.append(face_with_mask)
-    return faces, confidences
 def predict_video(input_video):
     cap = cv2.VideoCapture(input_video)
     frames = []
-    all_confidences = []
     while True:
         ret, frame = cap.read()
         if not ret:
             break
-        faces, confidences = predict_frame(frame)
-        if faces:
-            frames.extend(faces)
-        all_confidences.extend(confidences)
     cap.release()
     # Determine the final prediction based on the maximum occurrence of predictions
-    final_prediction = 'fake' if sum(1 for conf in all_confidences if conf['prediction'] == 'fake') > sum(1 for conf in all_confidences if conf['prediction'] == 'real') else 'real'
-    return final_prediction, frames, all_confidences
 # Gradio Interface
-def show_detected_faces(video):
-    prediction, frames, confidences = predict_video(video.name)
-    return prediction, frames, confidences
-gr.Interface(
-    fn=show_detected_faces,
     inputs=[
-        gr.inputs.Video(label="Input Video", type="file")
     ],
     outputs=[
-        gr.outputs.Label(label="Class"),
-        gr.outputs.Image(label="Detected Faces", type="numpy", multiple=True),
-        gr.outputs.Label(label="Confidences", type="json")
     ],
-    title="Deep Fake Video Detection",
-    description="Detect whether the Video is fake or real and visualize the detected faces with confidence scores."
-).launch()

     select_largest=False,
     post_process=False,
     device=DEVICE
+).to(DEVICE).eval()
 model = InceptionResnetV1(
     pretrained="vggface2",
     classify=True,
     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
     frame_pil = Image.fromarray(frame)
+    face = mtcnn(frame_pil)
+    if face is None:
+        return None, None  # No face detected
+    # Preprocess the face
+    face = F.interpolate(face.unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False)
+    face = face.to(DEVICE, dtype=torch.float32) / 255.0
+    # Predict
+    with torch.no_grad():
+        output = torch.sigmoid(model(face).squeeze(0))
+        prediction = "real" if output.item() < 0.5 else "fake"
+        # Confidence scores
+        real_prediction = 1 - output.item()
+        fake_prediction = output.item()
+        confidences = {
+            'real': real_prediction,
+            'fake': fake_prediction
+        }
+    # Visualize
+    target_layers = [model.block8.branch1[-1]]
+    use_cuda = True if torch.cuda.is_available() else False
+    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda)
+    targets = [ClassifierOutputTarget(0)]
+    grayscale_cam = cam(input_tensor=face, targets=targets, eigen_smooth=True)
+    grayscale_cam = grayscale_cam[0, :]
+    face_np = face.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    visualization = show_cam_on_image(face_np, grayscale_cam, use_rgb=True)
+    face_with_mask = cv2.addWeighted((face_np * 255).astype(np.uint8), 1, (visualization * 255).astype(np.uint8), 0.5, 0)
+    return prediction, face_with_mask
 def predict_video(input_video):
     cap = cv2.VideoCapture(input_video)
     frames = []
+    confidences = []
+    frame_count = 0
+    skip_frames = 20
     while True:
         ret, frame = cap.read()
         if not ret:
             break
+        frame_count+=1
+        if frame_count % skip_frames != 0:  # Skip frames if not divisible by skip_frames
+            continue
+        prediction, frame_with_mask = predict_frame(frame)
+        frames.append(frame_with_mask)
+        confidences.append(prediction)
     cap.release()
     # Determine the final prediction based on the maximum occurrence of predictions
+    final_prediction = 'fake' if confidences.count('fake') > confidences.count('real') else 'real'
+    return final_prediction
 # Gradio Interface
+interface = gr.Interface(
+    fn=predict_video,
     inputs=[
+        gr.Video(label="Input Video")
     ],
     outputs=[
+        gr.Label(label="Class"),
     ],
+    title="Deep fake video Detection",
+    description="Detect whether the  Video is fake or real"
+)
+interface.launch()