import os
os.system("pip install git+https://github.com/openai/whisper.git")
import evaluate
from evaluate.utils import launch_gradio_widget
import gradio as gr
import torch
from speechbrain.pretrained.interfaces import foreign_class
from transformers import AutoModelForSequenceClassification, pipeline, RobertaForSequenceClassification, RobertaTokenizer, AutoTokenizer
# pull in emotion detection
# --- Add element for specification
# pull in text classification
# --- Add custom labels
# --- Associate labels with radio elements
# add logic to initiate mock notificaiton when detected
# pull in misophonia-specific model

# Building prediction function for gradio
emo_dict = {
    'sad': 'Sad', 
    'hap': 'Happy',
    'ang': 'Anger',
    'neu': 'Neutral'
}

# static classes for now, but it would be best ot have the user select from multiple, and to enter their own
class_options = {
    "racism": ["racism", "hate speech", "bigotry", "racially targeted", "racially diminutive", "racial slur", "ethnic slur", "ethnic hate", "pro-white nationalism"],
    "LGBTQ+ hate": ["gay slur", "trans slur", "homophobic slur", "transphobia", "anti-LBGTQ+", "hate speech"],
    "sexually explicit": ["sexually explicit", "sexually coercive", "sexual exploitation", "vulgar", "raunchy", "sexually demeaning", "sexual violence", "victim blaming"],
    "misophonia": ["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"]
}

pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large")

# Create a Gradio interface with audio file and text inputs
def classify_toxicity(audio_file, text_input, classify_anxiety):
    # Transcribe the audio file using Whisper ASR
    if audio_file != None:
        transcribed_text = pipe(audio_file)["text"]
        
        #### Emotion classification ####
        emotion_classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
        out_prob, score, index, text_lab = emotion_classifier.classify_file(audio_file)
    
    else:
        transcribed_text = text_input
    if classify_anxiety != "misophonia":
        #### Toxicity Classifier ####
            
        toxicity_module = evaluate.load("toxicity",  "facebook/roberta-hate-speech-dynabench-r4-target")
        #toxicity_module = evaluate.load("toxicity", 'DaNLP/da-electra-hatespeech-detection', module_type="measurement")
    
        toxicity_results = toxicity_module.compute(predictions=[transcribed_text])
     
        toxicity_score = toxicity_results["toxicity"][0]
        print(toxicity_score)
    
        #### Text classification #####
    
        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    
        text_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    
        sequence_to_classify = transcribed_text
        print(classify_anxiety, class_options)
        candidate_labels = class_options.get(classify_anxiety, [])
        # classification_output = classifier(sequence_to_classify, candidate_labels, multi_label=False)
        classification_output = text_classifier(sequence_to_classify, candidate_labels, multi_label=True)
        print(classification_output)
    
        #### Emotion classification ####
        
        emotion_classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
        out_prob, score, index, text_lab = emotion_classifier.classify_file(audio_file)
     
        return toxicity_score, classification_output, emo_dict[text_lab[0]], transcribed_text
        # return f"Toxicity Score ({available_models[selected_model]}): {toxicity_score:.4f}"
    else: 
        return classify_anxiety
 
with gr.Blocks() as iface:
    with gr.Column():
        classify = gr.Radio(["racism", "LGBTQ+ hate", "sexually explicit", "misophonia"])
    with gr.Column():
        aud_input = gr.Audio(source="upload", type="filepath", label="Upload Audio File")
        text = gr.Textbox(label="Enter Text", placeholder="Enter text here...")
        submit_btn = gr.Button(label="Run")
    with gr.Column():
        out_text = gr.Textbox()
    submit_btn.click(fn=classify_toxicity, inputs=[aud_input, text, classify], outputs=out_text)

iface.launch()