File size: 2,587 Bytes
f4f5a40
488d50e
f03ec98
e7c7540
1111e0a
811d3ce
1111e0a
 
 
 
dff69a4
1111e0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d7a0eb1
e4b1e14
1111e0a
 
 
09457f4
1111e0a
 
09457f4
e7c7540
a4ace8a
 
270455b
d7a0eb1
1111e0a
270455b
1111e0a
 
105e8bf
1111e0a
 
 
 
 
 
d7a0eb1
811d3ce
1111e0a
811d3ce
1111e0a
270455b
 
1111e0a
 
270455b
1111e0a
270455b
f03ec98
 
faee536
1111e0a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from audioseal import AudioSeal
import torch
import torchaudio
import torchaudio.transforms as T
import traceback
import matplotlib.pyplot as plt
import numpy as np
import io
from PIL import Image

def plot_spectrogram(waveform, sample_rate):
    """Plot and return a spectrogram."""
    spectrogram_transform = T.Spectrogram()
    spectrogram = spectrogram_transform(waveform)
    spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram)

    plt.figure(figsize=(10, 4))
    plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower')
    plt.colorbar(format='%+2.0f dB')
    plt.title('Spectrogram')
    plt.xlabel('Time Frame')
    plt.ylabel('Frequency')
    
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close()
    buf.seek(0)
    
    return Image.open(buf)

def detect_watermark(audio_file_path, threshold=0.99):
    try:
        waveform, sample_rate = torchaudio.load(audio_file_path)

        # Normalize and resample
        waveform = waveform / torch.max(torch.abs(waveform))
        if sample_rate != 16000:
            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
            waveform = resampler(waveform)
            sample_rate = 16000

        if waveform.ndim < 3:
            waveform = waveform.unsqueeze(0)

        detector = AudioSeal.load_detector("audioseal_detector_16bits")
        result, confidence = detector.detect_watermark(waveform, message_threshold=threshold)

        # Visual feedback
        waveform_image = plot_spectrogram(waveform.squeeze(), sample_rate)

        if result:
            detection_message = f"AI-generated with confidence: {np.mean(confidence.numpy()):.2f}"
        else:
            detection_message = "Likely human-generated or the AI watermark is undetectable at the current threshold."
        
        return detection_message, waveform_image
    except Exception as e:
        error_traceback = traceback.format_exc()
        return f"Error occurred: {e}\n\n{error_traceback}", None

# Interface with dynamic threshold and visualization
interface = gr.Interface(
    fn=detect_watermark,
    inputs=[gr.Audio(label="Upload your audio", type="filepath"), gr.Slider(label="Detection Threshold", minimum=0, maximum=1, value=0.99)],
    outputs=["text", "image"],
    title="Deep Fake Defender: AI Voice Cloning Detection",
    description="Upload an audio file to check if it's AI-generated or genuine. Adjust the detection threshold to change sensitivity."
)

if __name__ == "__main__":
    interface.launch()