import gradio as gr from audioseal import AudioSeal import torch import torchaudio import torchaudio.transforms as T import traceback import matplotlib.pyplot as plt import numpy as np import io from PIL import Image def plot_spectrogram(waveform, sample_rate): """Plot and return a spectrogram.""" spectrogram_transform = T.Spectrogram() spectrogram = spectrogram_transform(waveform) spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram) plt.figure(figsize=(10, 4)) plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower') plt.colorbar(format='%+2.0f dB') plt.title('Spectrogram') plt.xlabel('Time Frame') plt.ylabel('Frequency') buf = io.BytesIO() plt.savefig(buf, format='png') plt.close() buf.seek(0) return Image.open(buf) def detect_watermark(audio_file_path, threshold=0.99): try: waveform, sample_rate = torchaudio.load(audio_file_path) # Normalize and resample waveform = waveform / torch.max(torch.abs(waveform)) if sample_rate != 16000: resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000) waveform = resampler(waveform) sample_rate = 16000 if waveform.ndim < 3: waveform = waveform.unsqueeze(0) detector = AudioSeal.load_detector("audioseal_detector_16bits") result, confidence = detector.detect_watermark(waveform, message_threshold=threshold) # Visual feedback waveform_image = plot_spectrogram(waveform.squeeze(), sample_rate) if result: detection_message = f"AI-generated with confidence: {np.mean(confidence.numpy()):.2f}" else: detection_message = "Likely human-generated or the AI watermark is undetectable at the current threshold." return detection_message, waveform_image except Exception as e: error_traceback = traceback.format_exc() return f"Error occurred: {e}\n\n{error_traceback}", None # Interface with dynamic threshold and visualization interface = gr.Interface( fn=detect_watermark, inputs=[gr.Audio(label="Upload your audio", type="filepath"), gr.Slider(label="Detection Threshold", minimum=0, maximum=1, value=0.99)], outputs=["text", "image"], title="Deep Fake Defender: AI Voice Cloning Detection", description="Upload an audio file to check if it's AI-generated or genuine. Adjust the detection threshold to change sensitivity." ) if __name__ == "__main__": interface.launch()