import gradio as gr from audioseal import AudioSeal import torch import numpy as np import traceback def detect_watermark(audio_data): try: # Ensure that audio_data is a tuple with two elements if not (isinstance(audio_data, tuple) and len(audio_data) == 2): return f"Invalid input: expected a tuple with two elements, got {type(audio_data)} with length {len(audio_data)}" # Ensure the first element of the tuple is a NumPy array audio_array, sample_rate = audio_data if not isinstance(audio_array, np.ndarray): return f"Invalid input: expected the first element of the tuple to be a np.ndarray, got {type(audio_array)}" # Ensure the second element of the tuple is an integer (sample rate) if not isinstance(sample_rate, int): return f"Invalid input: expected the second element of the tuple to be an int (sample rate), got {type(sample_rate)}" # Now we can proceed with the assurance that audio_array is an np.ndarray and sample_rate is an int # Ensure audio_array is 2D (channels, samples). If it's mono, add an axis. if audio_array.ndim == 1: audio_array = np.expand_dims(audio_array, axis=0) # Convert NumPy array to tensor waveform = torch.tensor(audio_array, dtype=torch.float32) # Ensure waveform is 3D (batch, channels, samples) for AudioSeal if waveform.ndim == 2: waveform = waveform.unsqueeze(0) # Initialize and use the AudioSeal detector detector = AudioSeal.load_detector("audioseal_detector_16bits") result, _ = detector.detect_watermark(waveform, message_threshold=0.5) # Interpret and return the detection result detection_result = "AI-generated" if result else "genuine" return f"This audio is likely {detection_result} based on watermark detection." except Exception as e: # Capture the full traceback in case of an error and return it as a string error_traceback = traceback.format_exc() return f"Error occurred: {str(e)}\n\n{error_traceback}" # Define Gradio interface interface = gr.Interface( fn=detect_watermark, inputs=gr.Audio(label="Upload your audio", type="numpy"), outputs="text", title="Deep Fake Defender: AI Voice Cloning Detection", description="Upload an audio file to check if it's AI-generated or genuine." ) if __name__ == "__main__": interface.launch()