import gradio as gr
from audioseal import AudioSeal
import torch
import numpy as np
import traceback

def detect_watermark(audio_data):
    try:
        # Ensure that audio_data is a tuple with two elements
        if not (isinstance(audio_data, tuple) and len(audio_data) == 2):
            return f"Invalid input: expected a tuple with two elements, got {type(audio_data)} with length {len(audio_data)}"
        
        # Ensure the first element of the tuple is a NumPy array
        audio_array, sample_rate = audio_data
        if not isinstance(audio_array, np.ndarray):
            return f"Invalid input: expected the first element of the tuple to be a np.ndarray, got {type(audio_array)}"
        
        # Ensure the second element of the tuple is an integer (sample rate)
        if not isinstance(sample_rate, int):
            return f"Invalid input: expected the second element of the tuple to be an int (sample rate), got {type(sample_rate)}"

        # Now we can proceed with the assurance that audio_array is an np.ndarray and sample_rate is an int
        # Ensure audio_array is 2D (channels, samples). If it's mono, add an axis.
        if audio_array.ndim == 1:
            audio_array = np.expand_dims(audio_array, axis=0)

        # Convert NumPy array to tensor
        waveform = torch.tensor(audio_array, dtype=torch.float32)

        # Ensure waveform is 3D (batch, channels, samples) for AudioSeal
        if waveform.ndim == 2:
            waveform = waveform.unsqueeze(0)

        # Initialize and use the AudioSeal detector
        detector = AudioSeal.load_detector("audioseal_detector_16bits")
        result, _ = detector.detect_watermark(waveform, message_threshold=0.5)

        # Interpret and return the detection result
        detection_result = "AI-generated" if result else "genuine"
        return f"This audio is likely {detection_result} based on watermark detection."

    except Exception as e:
        # Capture the full traceback in case of an error and return it as a string
        error_traceback = traceback.format_exc()
        return f"Error occurred: {str(e)}\n\n{error_traceback}"

# Define Gradio interface
interface = gr.Interface(
    fn=detect_watermark,
    inputs=gr.Audio(label="Upload your audio", type="numpy"),
    outputs="text",
    title="Deep Fake Defender: AI Voice Cloning Detection",
    description="Upload an audio file to check if it's AI-generated or genuine."
)

if __name__ == "__main__":
    interface.launch()