__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']

import gradio as gr
import pandas as pd
import re
import pandas as pd
import numpy as np
from collections import defaultdict
from constants import *
import os
from huggingface_hub import Repository
import json


global data_component_aad, data_component_iasd, data_component_ivqd, filter_component


TOKEN = os.environ.get("TOKEN")

repo = Repository(local_dir="./download_from_dataset", clone_from="MM-UPD/results_for_leaderboard", repo_type="dataset", use_auth_token=TOKEN)

current_directory = os.getcwd()


def validate_model_size(s):
    pattern = r'^\d+B$|^-$'
    if re.match(pattern, s):
        return s
    else:
        return '-'


def upload_file(files):
    file_paths = [file.name for file in files]
    return file_paths


def create_df(input_file):
    json_string = input_file.decode('utf-8')
    data = json.loads(json_string)
    df = pd.DataFrame(data)
    return df


# Accuracy Report
def report_acc(df, groupd='category', metric_type="dual"):
    assert 'split' in df
    assert groupd in [None, 'category', 'l2-category']

    res = defaultdict(list)
    res['split'] = ['test']
    if groupd is None:
        if metric_type == "dual":
            res['overall'] = [
                np.mean(df['hit']),
            ]
        elif metric_type == "standard":
            res['overall'] = [
                np.mean(df['hit_standard']),
            ]
        elif metric_type == "upd":
            res['overall'] = [
                np.mean(df['hit_upd']),
            ]
        return pd.DataFrame(res)

    elif groupd in df:
        abilities = list(set(df[groupd]))
        abilities.sort()
        for ab in abilities:
            sub_df = df[df[groupd] == ab]
            if metric_type == "dual":
                res[ab] = [
                    np.mean(sub_df['hit']),
                ]
            elif metric_type == "standard":
                res[ab] = [
                    np.mean(sub_df['hit_standard']),
                ]
            elif metric_type == "upd":
                res[ab] = [
                    np.mean(sub_df['hit_upd']),
                ]

        return pd.DataFrame(res)


def eval_result_dual(data_main, metric_type="dual"):
    overall = report_acc(data_main, None, metric_type)
    leaf = report_acc(data_main, 'category', metric_type)

    overall = round(overall['overall'].values[0] * 100, 1)
    leaf = leaf.iloc[:, 1:].values.flatten().tolist()
    leaf = [round(x * 100, 1) for x in leaf]

    return overall, leaf


def calculate_score(input_file):
    dual_df = create_df(input_file)
    overall_dual, leaf_dual = eval_result_dual(dual_df)
    overall_standard, leaf_standard = eval_result_dual(dual_df, metric_type="standard")
    overall_upd, leaf_upd = eval_result_dual(dual_df, metric_type="upd")

    return overall_dual, overall_standard, overall_upd, leaf_dual


# add the new data into the queue
def add_queue(base_df, input_file, model_name):
    dual_df = create_df(input_file)
    base_df[f"{model_name}_prediction_standard"] = dual_df["prediction_standard"]
    base_df[f"{model_name}_hit_standard"] = dual_df["hit_standard"]
    base_df[f"{model_name}_prediction_upd"] = dual_df["prediction_upd"]
    base_df[f"{model_name}_hit_upd"] = dual_df["hit_upd"]
    base_df[f"{model_name}_hit"] = dual_df["hit"]
    return base_df


# check whether the input file is correct or not
def validity_check(input_file, UPD_type, question_type):

    input_df = create_df(input_file)

    # check for the correct data size
    data_num_dict = {"AAD": 820, "IASD": 919, "IVQD": 356}
    assert len(input_df) == data_num_dict[UPD_type], "Different Data Size"
    print("len(input)", len(input_df))
    print("data_num_dict[UPD_type]", data_num_dict[UPD_type])
    # check for missing columns
    column_list = ["hit_upd", "hit_standard", "hit", "prediction_upd", "prediction_standard"]
    assert all(x in input_df.columns for x in column_list), "Column Missing"

    # check for missing values
    assert not input_df[column_list].isnull().any().any(), "Missing values found in columns"

    # check for the presence of the correct values
    option_mapping = {"AAD": "None of the above", "IASD": "None of the above", "IVQD": "The image and question are irrelevant."}
    instruction_mapping = {"AAD": "F. None of the above", "IASD": "F. None of the above", "IVQD": "F. The image and question are irrelevant."}

    input_df["D_upd"] = input_df["D_upd"].fillna("")

    if question_type == "Base":
        assert not input_df["D_upd"].str.contains(option_mapping[UPD_type]).any(), f"{option_mapping[UPD_type]} found in Base"
        assert not input_df["prediction_upd"].str.contains(instruction_mapping[UPD_type]).any(), f"{instruction_mapping[UPD_type]} found in Base"
    elif question_type == "Option":
        assert input_df["D_upd"].str.contains(option_mapping[UPD_type]).any(), f"{option_mapping[UPD_type]}not found in Option"
        assert not input_df["prediction_upd"].str.contains(instruction_mapping[UPD_type]).any(), f"{instruction_mapping[UPD_type]} found in Option"
    elif question_type == "Instruction":
        assert not input_df["D_upd"].str.contains(option_mapping[UPD_type]).any(), f"{option_mapping[UPD_type]} found in Instruction"

    return True


def add_new_eval(
    input_file,
    model_type: str,
    model_name_textbox: str,
    revision_name_textbox: str,
    model_link: str,
    model_size: str,
    upd_type: str,
    LLM_type: str,
    LLM_name_textbox: str,
    question_type: str

):

    if input_file is None:
        warning_text = "Error! Empty file!"
        print(warning_text)
        return warning_text
    else:
        model_size = validate_model_size(model_size)
        if upd_type == 'AAD':
            csv_path = CSV_AAD_RESULT_PATH
        elif upd_type == 'IASD':
            csv_path = CSV_IASD_RESULT_PATH
        elif upd_type == 'IVQD':
            csv_path = CSV_IVQD_RESULT_PATH

        validity_check(input_file, upd_type, question_type)

        csv_data = pd.read_csv(csv_path)

        overall_dual_acc, overall_standard_acc, overall_upd_acc, leaf_dual = calculate_score(input_file)

        if LLM_type == 'Other':
            LLM_name = LLM_name_textbox
        else:
            LLM_name = LLM_type

        if revision_name_textbox == '':
            col = csv_data.shape[0]
            model_name = model_name_textbox
        else:
            model_name = revision_name_textbox
            model_name_list = csv_data['Model']
            name_list = [name.split(']')[0][1:] for name in model_name_list]
            if revision_name_textbox not in name_list:
                col = csv_data.shape[0]
            else:
                col = name_list.index(revision_name_textbox)    

        if model_link == '':
            model_name = model_name  # no url
        else:
            model_name = '[' + model_name + '](' + model_link + ')'

        # add new data
        new_data = [
            model_type,
            model_name,
            LLM_name,
            model_size,
            question_type,
            overall_dual_acc,
            overall_standard_acc,
            overall_upd_acc,
            ]
        new_data += leaf_dual

        # If the same data already exists, return an error.
        if new_data in csv_data.values.tolist():
            warning_text = "Error! The same data already exists!"
            print(warning_text)
            return warning_text
        # If the same model name already exists, return an error.
        elif new_data[:5] in csv_data.values.tolist():
            warning_text = "Error! The same data already exists! Please fill revision_name."
            print(warning_text)
            return warning_text

        csv_data.loc[col] = new_data
        csv_data = csv_data.to_csv(csv_path, index=False)

        absolute_result_path = os.path.abspath(csv_path)
        if not os.path.exists(absolute_result_path):
            raise FileNotFoundError(f"File {absolute_result_path} not found")

        repo.git_pull()
        repo.git_add(absolute_result_path)

        csv_queue_path = os.path.join(CSV_QUEUE_DIR, f"detail_results_{upd_type.lower()}_{question_type.lower()}.csv")
        base_data = pd.read_csv(csv_queue_path)

        base_data = add_queue(base_data, input_file, model_name)
        base_data.to_csv(csv_queue_path, index=False)

        absolute_queue_path = os.path.abspath(csv_queue_path)
        if not os.path.exists(absolute_queue_path):
            raise FileNotFoundError(f"File {absolute_queue_path} not found")

        repo.git_add(absolute_queue_path)
        repo.git_commit(f"add {model_name} results in {question_type}")

        repo.git_push()

    return 0


def get_baseline_aad_df():
    repo.git_pull()
    df = pd.read_csv(CSV_AAD_RESULT_PATH)
    df = df.sort_values(by="Overall Dual Acc.", ascending=False)
    present_columns = MODEL_INFO + checkbox_aad_group.value
    df = df[present_columns]
    return df


def get_all_aad_df():
    repo.git_pull()
    df = pd.read_csv(CSV_AAD_RESULT_PATH)
    df = df.sort_values(by="Overall Dual Acc.", ascending=False)
    return df


def get_baseline_iasd_df():
    repo.git_pull()
    df = pd.read_csv(CSV_IASD_RESULT_PATH)
    df = df.sort_values(by="Overall Dual Acc.", ascending=False)
    present_columns = MODEL_INFO + checkbox_iasd_group.value
    df = df[present_columns]
    return df


def get_all_iasd_df():
    repo.git_pull()
    df = pd.read_csv(CSV_IASD_RESULT_PATH)
    df = df.sort_values(by="Overall Dual Acc.", ascending=False)
    return df


def get_baseline_ivqd_df():
    repo.git_pull()
    df = pd.read_csv(CSV_IVQD_RESULT_PATH)
    df = df.sort_values(by="Overall Dual Acc.", ascending=False)
    present_columns = MODEL_INFO + checkbox_ivqd_group.value
    df = df[present_columns]
    return df


def get_all_ivqd_df():
    repo.git_pull()
    df = pd.read_csv(CSV_IVQD_RESULT_PATH)
    df = df.sort_values(by="Overall Dual Acc.", ascending=False)
    return df


block = gr.Blocks()


with block:
    gr.Markdown(
        LEADERBORAD_INTRODUCTION
    )
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        # table mmupd bench
        with gr.TabItem("🏅 MM-AAD Benchmark", elem_id="mmaad-benchmark-tab-table", id=1):
            # selection for column part:
            checkbox_aad_group = gr.CheckboxGroup(
                choices=TASK_AAD_INFO,
                value=AVG_INFO,
                label="Evaluation Dimension",
                interactive=True,
            ) # user can select the evaluation dimension

            with gr.Row():
                # selection for model size part:
                model_size = gr.CheckboxGroup(
                    choices=MODEL_SIZE,
                    value=MODEL_SIZE,
                    label="Model Size",
                    interactive=True,
                )

                # selection for model size part:
                question_type = gr.CheckboxGroup(
                    choices=QUESTION_TYPE,
                    value=QUESTION_TYPE,
                    label="Question Type",
                    interactive=True,
                )

            baseline_value = get_baseline_aad_df()
            baseline_header = MODEL_INFO + checkbox_aad_group.value
            baseline_datatype = ['markdown'] * 4 + ['number'] * len(checkbox_aad_group.value)

            data_component_aad = gr.components.Dataframe(
                value=baseline_value,
                headers=baseline_header,
                type="pandas",
                datatype=baseline_datatype,
                interactive=False,
                visible=True,
                )  

            def on_filter_model_size_method_change(selected_model_size, selected_question_type, selected_columns):

                updated_data = get_all_aad_df()
                # model_size & question_type:

                def custom_filter(row, model_size_filters, question_type_filters):
                    model_size = row['Model Size']
                    question_type = row['Question Type']
                    model_size = model_size.upper()

                    if model_size == '-':
                        size_filter = '-' in model_size_filters
                    elif 'B' in model_size:
                        size = float(model_size.replace('B', ''))
                        size_filter = ('>=10B' in model_size_filters and size >= 10) or ('<10B' in model_size_filters and size < 10)
                    else:
                        size_filter = False

                    question_type_filter = question_type in question_type_filters

                    return size_filter and question_type_filter

                mask = updated_data.apply(custom_filter, axis=1, model_size_filters=selected_model_size, question_type_filters=selected_question_type)
                updated_data = updated_data[mask]

                # columns:
                selected_columns = [item for item in TASK_AAD_INFO if item in selected_columns]
                present_columns = MODEL_INFO + selected_columns
                updated_data = updated_data[present_columns]
                updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False)
                updated_headers = present_columns
                update_datatype = [DATA_AAD_TITILE_TYPE[COLUMN_AAD_NAMES.index(x)] for x in updated_headers]

                filter_component = gr.components.Dataframe(
                    value=updated_data,
                    headers=updated_headers,
                    type="pandas",
                    datatype=update_datatype,
                    interactive=False,
                    visible=True,
                    )
                return filter_component


            model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, question_type, checkbox_aad_group], outputs=data_component_aad)
            question_type.change(fn=on_filter_model_size_method_change, inputs=[model_size, question_type, checkbox_aad_group], outputs=data_component_aad)
            checkbox_aad_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, question_type, checkbox_aad_group], outputs=data_component_aad)

        with gr.TabItem("🏅 MM-IASD Benchmark", elem_id="mmiasd-benchmark-tab-table", id=2):
            checkbox_iasd_group = gr.CheckboxGroup(
                choices=TASK_IASD_INFO,
                value=AVG_INFO,
                label="Evaluation Dimension",
                interactive=True,
            ) # user can select the evaluation dimension

            with gr.Row():
                # selection for model size part:
                model_size = gr.CheckboxGroup(
                    choices=MODEL_SIZE,
                    value=MODEL_SIZE,
                    label="Model Size",
                    interactive=True,
                )

                # selection for model size part:
                question_type = gr.CheckboxGroup(
                    choices=QUESTION_TYPE,
                    value=QUESTION_TYPE,
                    label="Question Type",
                    interactive=True,
                )

            baseline_value = get_baseline_iasd_df()
            baseline_header = MODEL_INFO + checkbox_iasd_group.value
            baseline_datatype = ['markdown'] * 4 + ['number'] * len(checkbox_iasd_group.value)

            data_component_iasd = gr.components.Dataframe(
                value=baseline_value,
                headers=baseline_header,
                type="pandas",
                datatype=baseline_datatype,
                interactive=False,
                visible=True,
                )        

            def on_filter_model_size_method_change(selected_model_size, selected_question_type, selected_columns):

                updated_data = get_all_iasd_df()

                def custom_filter(row, model_size_filters, question_type_filters):
                    model_size = row['Model Size']
                    question_type = row['Question Type']
                    model_size = model_size.upper()

                    if model_size == '-':
                        size_filter = '-' in model_size_filters
                    elif 'B' in model_size:
                        size = float(model_size.replace('B', ''))
                        size_filter = ('>=10B' in model_size_filters and size >= 10) or ('<10B' in model_size_filters and size < 10)
                    else:
                        size_filter = False

                    question_type_filter = question_type in question_type_filters

                    return size_filter and question_type_filter

                mask = updated_data.apply(custom_filter, axis=1, model_size_filters=selected_model_size, question_type_filters=selected_question_type)
                updated_data = updated_data[mask]

                # columns:
                selected_columns = [item for item in TASK_IASD_INFO if item in selected_columns]
                present_columns = MODEL_INFO + selected_columns
                updated_data = updated_data[present_columns]
                updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False)
                updated_headers = present_columns
                update_datatype = [DATA_IASD_TITILE_TYPE[COLUMN_IASD_NAMES.index(x)] for x in updated_headers]

                filter_component = gr.components.Dataframe(
                    value=updated_data,
                    headers=updated_headers,
                    type="pandas",
                    datatype=update_datatype,
                    interactive=False,
                    visible=True,
                    )
                return filter_component

            model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, question_type, checkbox_iasd_group], outputs=data_component_iasd)
            question_type.change(fn=on_filter_model_size_method_change, inputs=[model_size, question_type, checkbox_iasd_group], outputs=data_component_iasd)
            checkbox_iasd_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, question_type, checkbox_iasd_group], outputs=data_component_iasd)

        # Table 3
        with gr.TabItem("🏅 MM-IVQD Benchmark", elem_id="mmiasd-benchmark-tab-table", id=3):
            # selection for column part:
            checkbox_ivqd_group = gr.CheckboxGroup(
                choices=TASK_IVQD_INFO,
                value=AVG_INFO,
                label="Evaluation Dimension",
                interactive=True,
            )  # user can select the evaluation dimension

            with gr.Row():
                # selection for model size part:
                model_size = gr.CheckboxGroup(
                    choices=MODEL_SIZE,
                    value=MODEL_SIZE,
                    label="Model Size",
                    interactive=True,
                )

                # selection for model size part:
                question_type = gr.CheckboxGroup(
                    choices=QUESTION_TYPE,
                    value=QUESTION_TYPE,
                    label="Question Type",
                    interactive=True,
                )

            baseline_value = get_baseline_ivqd_df()
            baseline_header = MODEL_INFO + checkbox_ivqd_group.value
            baseline_datatype = ['markdown'] * 4 + ['number'] * len(checkbox_ivqd_group.value)

            data_component_ivqd = gr.components.Dataframe(
                value=baseline_value,
                headers=baseline_header,
                type="pandas",
                datatype=baseline_datatype,
                interactive=False,
                visible=True,
                )      

            def on_filter_model_size_method_change(selected_model_size, selected_question_type, selected_columns):

                updated_data = get_all_ivqd_df()

                def custom_filter(row, model_size_filters, question_type_filters):
                    model_size = row['Model Size']
                    question_type = row['Question Type']
                    model_size = model_size.upper()

                    if model_size == '-':
                        size_filter = '-' in model_size_filters
                    elif 'B' in model_size:
                        size = float(model_size.replace('B', ''))
                        size_filter = ('>=10B' in model_size_filters and size >= 10) or ('<10B' in model_size_filters and size < 10)
                    else:
                        size_filter = False

                    question_type_filter = question_type in question_type_filters

                    return size_filter and question_type_filter

                mask = updated_data.apply(custom_filter, axis=1, model_size_filters=selected_model_size, question_type_filters=selected_question_type)
                updated_data = updated_data[mask]

                selected_columns = [item for item in TASK_IVQD_INFO if item in selected_columns]
                present_columns = MODEL_INFO + selected_columns
                updated_data = updated_data[present_columns]
                updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False)
                updated_headers = present_columns
                update_datatype = [DATA_IVQD_TITILE_TYPE[COLUMN_IVQD_NAMES.index(x)] for x in updated_headers]

                filter_component = gr.components.Dataframe(
                    value=updated_data,
                    headers=updated_headers,
                    type="pandas",
                    datatype=update_datatype,
                    interactive=False,
                    visible=True,
                    )
                return filter_component

            model_size.change(fn=on_filter_model_size_method_change, inputs=[model_size, question_type, checkbox_ivqd_group], outputs=data_component_ivqd)
            question_type.change(fn=on_filter_model_size_method_change, inputs=[model_size, question_type, checkbox_ivqd_group], outputs=data_component_ivqd)
            checkbox_ivqd_group.change(fn=on_filter_model_size_method_change, inputs=[model_size, question_type, checkbox_ivqd_group], outputs=data_component_ivqd)

        # table 4
        with gr.TabItem("📝 About", elem_id="mmupd-benchmark-tab-table", id=4):
            gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")

        # table 5
        with gr.TabItem("🚀 Submit here! ", elem_id="mmupd-benchmark-tab-table", id=5):
            with gr.Row():
                gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")

            with gr.Row():
                gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")

            with gr.Row():
                with gr.Column():
                    model_type = gr.Dropdown(
                        choices=["VLM", "LLM"],
                        label="Model type",
                        multiselect=False,
                        value="VLM",
                        interactive=True,
                    )
                    model_name_textbox = gr.Textbox(
                        label="Model name", placeholder="LLaMA-7B"
                        )
                    revision_name_textbox = gr.Textbox(
                        label="Revision Model Name", placeholder="LLaMA-7B"
                    )

                    model_link = gr.Textbox(
                        label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf"
                    )

                    model_size = gr.Textbox(
                        label="Model size", placeholder="7B(Input content format must be 'number+B' or '-', default is '-')"
                    )

                with gr.Column():
                    LLM_type = gr.Dropdown(
                        choices=["Vicuna-1.5-7B", "Vicuna-1.5-13B", "Flan-T5-XL", "LLaMA-7B", "Llama-13B", "Llama-3-8B", "Llama-3-70B", "Yi-34B", "Mistral-7B", "Other"],
                        label="LLM type",
                        multiselect=False,
                        value="Vicuna-1.5-13B",
                        interactive=True,
                    )

                    LLM_name_textbox = gr.Textbox(
                        label="LLM model (Required for Other)",
                        placeholder="GPT-4",
                    )

                    upd_type = gr.Dropdown(
                        choices=[
                            "AAD",
                            "IASD",
                            "IVQD",
                        ],
                        label="UPD type",
                        multiselect=False,
                        value="AAD",
                        interactive=True,
                    )

                    question_type = gr.Dropdown(
                        choices=QUESTION_TYPE,
                        label="Question Type",
                        multiselect=False,
                        value=QUESTION_TYPE[0],
                        interactive=True,
                    )

            with gr.Column():

                input_file = gr.components.File(label="Click to Upload a JSON File", file_count="single", type='binary')
                submit_button = gr.Button("Submit Eval")

                submission_result = gr.Markdown()
                submit_button.click(
                    add_new_eval,
                    inputs = [
                        input_file,
                        model_type,
                        model_name_textbox,
                        revision_name_textbox,
                        model_link,
                        model_size,
                        upd_type,
                        LLM_type,
                        LLM_name_textbox,
                        question_type
                    ],
                )

    def refresh_data():
        value1 = get_baseline_aad_df()
        value2 = get_baseline_iasd_df()
        value3 = get_baseline_ivqd_df()

        return value1, value2, value3

    with gr.Row():
        data_run = gr.Button("Refresh")
        data_run.click(
            refresh_data, outputs=[data_component_aad, data_component_iasd, data_component_ivqd]
        )

    with gr.Accordion("Citation", open=False):
        citation_button = gr.Textbox(
            value=CITATION_BUTTON_TEXT,
            label=CITATION_BUTTON_LABEL,
            elem_id="citation-button",
            show_copy_button=True,
        )

block.launch()