File size: 2,495 Bytes
f4f5a40
488d50e
f03ec98
dff69a4
d7a0eb1
dff69a4
270455b
d7a0eb1
04a53dc
 
 
 
 
1398c93
04a53dc
 
 
 
 
 
270455b
04a53dc
270455b
d7a0eb1
 
270455b
 
d7a0eb1
270455b
 
d7a0eb1
1c53cc7
270455b
 
d7a0eb1
270455b
 
 
d7a0eb1
 
270455b
d7a0eb1
270455b
faee536
270455b
f03ec98
270455b
 
 
 
 
 
 
 
f03ec98
 
faee536
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio as gr
from audioseal import AudioSeal
import torch
import numpy as np
import traceback

def detect_watermark(audio_data):
    try:
        # Ensure that audio_data is a tuple with two elements
        if not (isinstance(audio_data, tuple) and len(audio_data) == 2):
            return f"Invalid input: expected a tuple with two elements, got {type(audio_data)} with length {len(audio_data)}"
        
        # Ensure the first element of the tuple is a NumPy array
        audio_array, sample_rate = audio_data
        if not isinstance(audio_array, np.ndarray):
            return f"Invalid input: expected the first element of the tuple to be a np.ndarray, got {type(audio_array)}"
        
        # Ensure the second element of the tuple is an integer (sample rate)
        if not isinstance(sample_rate, int):
            return f"Invalid input: expected the second element of the tuple to be an int (sample rate), got {type(sample_rate)}"

        # Now we can proceed with the assurance that audio_array is an np.ndarray and sample_rate is an int
        # Ensure audio_array is 2D (channels, samples). If it's mono, add an axis.
        if audio_array.ndim == 1:
            audio_array = np.expand_dims(audio_array, axis=0)

        # Convert NumPy array to tensor
        waveform = torch.tensor(audio_array, dtype=torch.float32)

        # Ensure waveform is 3D (batch, channels, samples) for AudioSeal
        if waveform.ndim == 2:
            waveform = waveform.unsqueeze(0)

        # Initialize and use the AudioSeal detector
        detector = AudioSeal.load_detector("audioseal_detector_16bits")
        result, _ = detector.detect_watermark(waveform, message_threshold=0.5)

        # Interpret and return the detection result
        detection_result = "AI-generated" if result else "genuine"
        return f"This audio is likely {detection_result} based on watermark detection."

    except Exception as e:
        # Capture the full traceback in case of an error and return it as a string
        error_traceback = traceback.format_exc()
        return f"Error occurred: {str(e)}\n\n{error_traceback}"

# Define Gradio interface
interface = gr.Interface(
    fn=detect_watermark,
    inputs=gr.Audio(label="Upload your audio", type="numpy"),
    outputs="text",
    title="Deep Fake Defender: AI Voice Cloning Detection",
    description="Upload an audio file to check if it's AI-generated or genuine."
)

if __name__ == "__main__":
    interface.launch()