from datasets import load_dataset
from collections import Counter, defaultdict
from random import sample, shuffle
from collections import Counter
import datasets
from pandas import DataFrame
from huggingface_hub import list_datasets
import os
import gradio as gr

import secrets


parti_prompt_results = []
ORG = "diffusers-parti-prompts"
SUBMISSIONS = {
    "kand2": load_dataset(os.path.join(ORG, "kandinsky-2-2"))["train"],
    "sdxl": load_dataset(os.path.join(ORG, "sdxl-1.0-refiner"))["train"],
    "wuerst": load_dataset(os.path.join(ORG, "wuerstchen"))["train"],
    "karlo": load_dataset(os.path.join(ORG, "karlo-v1"))["train"],
}

LINKS = {
    "kand2": "https://huggingface.co/kandinsky-community/kandinsky-2-2-decoder",
    "sdxl": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
    "wuerst": "https://huggingface.co/warp-ai/wuerstchen",
    "karlo": "https://huggingface.co/kakaobrain/karlo-v1-alpha",
}
KANDINSKY = """
"## The creative one 🎨! 
![img](https://aeiljuispo.cloudimg.io/v7/https://cdn-uploads.huggingface.co/production/uploads/5dfcb1aada6d0311fd3d5448/rETvCyoUD5Mr9wm6OxUhe.png?w=200&h=200&f=face)
\n You mostly resonate with **Kandinsky 2.2** released by AI Forever.
\n Kandinsky 2.2 has a similar architecture to DALLE-2 and works extremely well for artistic, colorful generations.
\n Check out your soulmate [here](https://huggingface.co/kandinsky-community/kandinsky-2-2-decoder).
"""
SDXL_RESULT = """
## The powerful one ⚡! 
![img](https://huggingface.co/datasets/OpenGenAI/logos/resolve/main/7vmYr2XwVcPtkLzac_jxQ.png)
\n You mostly resonate with **Stable Diffusion XL** released by Stability AI.
\n Stable Diffusion XL consists of a two diffusion models that are chained together, a base model and a refiner model. Together, the system contains roughly 5 billion parameters.
\n It's the latest open-source release of Stable Diffusion and allows to render stunning images of much larger sizes than Stable Diffusion v1.
Try it out [here](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0).
"""
WUERSTCHEN = """
## The innovative one ⚗️ !
![img](https://www.gravatar.com/avatar/3219846609129e84790fb83793998d61?d=retro&size=100)
\n You mostly resonate with **Wuerstchen** released by the WARP team.
\n Wuerstchen is a three stage diffusion model that proposed a very novel, innovative model architecture.
\n Wuerstchen is able to generate very large images (up to 1024x2048) in just a few seconds.
\n The model has an amazing image quality vs. speed trade-off.
\n Check out your new best friend [here](https://huggingface.co/warp-ai/wuerstchen).
"""
KARLO = """
## The precise one 🎯!
![img](https://huggingface.co/datasets/OpenGenAI/logos/resolve/main/1670220967262-615ed619c807b26d117a49bd.png)
\n You mostly resonate with **Karlo** released by KakaoBrain.
\n Karlo is based on the same architecture as DALLE-2 and has been trained on the [well curated COYO dataset](https://huggingface.co/datasets/kakaobrain/coyo-700m).
\n Play around with it [here]("https://huggingface.co/kakaobrain/karlo-v1-alpha").
"""

RESULT = {
    "kand2": KANDINSKY,
    "wuerst": WUERSTCHEN,
    "sdxl": SDXL_RESULT,
    "karlo": KARLO,
}
NUM_QUESTIONS = 10
MODEL_KEYS = "-".join(SUBMISSIONS.keys())
SUBMISSION_ORG = f"results-{MODEL_KEYS}"
PROMPT_FORMAT = " Select the image that best matches the prompt and click on 'Submit'. Remember that if multiple images match the prompt equally well, select them all. If no image matches the prompt, no image shall be selected."

submission_names = list(SUBMISSIONS.keys())
num_images = len(SUBMISSIONS[submission_names[0]])


def load_submissions():
    all_datasets = list_datasets(author=SUBMISSION_ORG)
    relevant_ids = [d.id for d in all_datasets]
    
    submitted_ids = []
    for _id in relevant_ids:
        ds = load_dataset(_id)["train"]
        submitted_ids += ds["id"]
    
    submitted_ids = Counter(submitted_ids)
    return submitted_ids


SUBMITTED_IDS = load_submissions()


def generate_random_hash(length=8):
    """
    Generates a random hash of specified length.
    
    Args:
        length (int): The length of the hash to generate.
        
    Returns:
        str: A random hash of specified length.
    """
    if length % 2 != 0:
        raise ValueError("Length should be an even number.")
    
    num_bytes = length // 2
    random_bytes = secrets.token_bytes(num_bytes)
    random_hash = secrets.token_hex(num_bytes)
    
    return random_hash
    

def refresh(row_number, dataframe):
    if row_number == NUM_QUESTIONS:
        submitted_ids = load_submissions()
        return start(submitted_ids)
    else:
        return dataframe

def start():
    ids = {id: 0 for id in range(num_images)}
    ids = {**ids, **SUBMITTED_IDS}

    # sort by count
    ids = sorted(ids.items(), key=lambda x: x[1])
    freq_ids = defaultdict(list)
    for k, v in ids:
        freq_ids[v].append(k)

    # shuffle in-between categories
    for k, v_list in freq_ids.items():
        shuffle(v_list)
        freq_ids[k] = v_list

    shuffled_ids = sum(list(freq_ids.values()), [])

    # get lowest count ids
    id_candidates = shuffled_ids[: (10 * NUM_QUESTIONS)]

    # get random `NUM_QUESTIONS` ids to check
    image_ids = sample(id_candidates, k=NUM_QUESTIONS)
    images = {}

    for i in range(NUM_QUESTIONS):
        order = list(range(len(SUBMISSIONS)))
        shuffle(order)

        id = image_ids[i]
        row = SUBMISSIONS[submission_names[0]][id]
        images[i] = {
            "prompt": row["Prompt"],
            "result": "",
            "id": id,
            "Challenge": row["Challenge"],
            "Category": row["Category"],
            "Note": row["Note"],
        }
        for n, m in enumerate(order):
            images[i][f"choice_{n}"] = m

    images_frame = DataFrame.from_dict(images, orient="index")
    return images_frame


def process(dataframe, row_number=0):
    if row_number == NUM_QUESTIONS:
        nones = len(RESULT) * [None]
        falses = len(RESULT) * [False]
        return *nones, *falses, "", ""

    image_id = dataframe.iloc[row_number]["id"]
    choices = [
        submission_names[dataframe.iloc[row_number][f"choice_{i}"]]
        for i in range(len(SUBMISSIONS))
    ]
    images = [SUBMISSIONS[c][int(image_id)]["images"] for c in choices]

    prompt = SUBMISSIONS[choices[0]][int(image_id)]["Prompt"]
    prompt = f'# "{prompt}"'
    counter = f"***{row_number + 1}/{NUM_QUESTIONS} {PROMPT_FORMAT}***"
    image_buttons = len(images) * [False]

    return *images, *image_buttons, prompt, counter


def write_result(user_choice, row_number, dataframe):
    if row_number == NUM_QUESTIONS:
        return row_number, dataframe

    user_choices = []
    for i, b in enumerate(str(user_choice)):
        if bool(int(b)):
            user_choices.append(i)

    chosen_models = []
    for user_choice in user_choices:
        chosen_models.append(submission_names[dataframe.iloc[row_number][f"choice_{user_choice}"]])

    print(chosen_models)
    dataframe.loc[row_number, "result"] = ",".join(chosen_models)
    return row_number + 1, dataframe


def get_index(evt: gr.SelectData) -> int:
    return evt.index


def change_view(row_number, dataframe):
    if row_number == NUM_QUESTIONS:

        results = sum([x.split(",") for x in dataframe["result"].values], [])
        results = [r for r in results if len(r) > 0]
        favorite_model = Counter(results).most_common(1)[0][0]

        dataset = datasets.Dataset.from_pandas(dataframe)
        dataset = dataset.remove_columns(set(dataset.column_names) - set(["id", "result"]))
        hash = generate_random_hash()
        repo_id = os.path.join(SUBMISSION_ORG, hash)

        dataset.push_to_hub(repo_id, token=os.getenv("HF_TOKEN"))
        return {
            intro_view: gr.update(visible=False),
            result_view: gr.update(visible=True),
            gallery_view: gr.update(visible=False),
            start_view: gr.update(visible=True),
            result: RESULT[favorite_model],
        }
    else:
        return {
            intro_view: gr.update(visible=False),
            result_view: gr.update(visible=False),
            gallery_view: gr.update(visible=True),
            start_view: gr.update(visible=False),
            result: "",
        }


TITLE = "# What AI model is best for you? 👩‍⚕️"

DESCRIPTION = """
***How it works*** 📖 \n\n
- Upon clicking start, you are shown image descriptions alongside four AI generated images.
\n- Select the image that best matches the prompt. If multiple images match the prompt equally well, select all images. If no image matches the prompt, leave all images unchecked.
\n- Answer **10** questions to find out what AI generator most resonates with you. 
\n- Your submissions contribute to [**Open Parti Prompts Leaderboard**](https://huggingface.co/spaces/OpenGenAI/parti-prompts-leaderboard) ❤️.
\n\n
"""

NOTE = """\n\n\n\n
The prompts you are shown originate from the [Parti Prompts](https://huggingface.co/datasets/nateraw/parti-prompts) dataset.
Parti Prompts is designed to test text-to-image AI models on 1600+ prompts of varying difficulty and categories.
The images you are shown have been pre-generated with 4 state-of-the-art open-sourced text-to-image models.
You answers will be used to contribute to the official [**Open Parti Prompts Leaderboard**](https://huggingface.co/spaces/OpenGenAI/parti-prompts-leaderboard).
Every couple months, the generated images will be updated with possibly improved models. The current models and code that was used to generate the images can be verified here:\n
- [kandinsky-2-2](https://huggingface.co/kandinsky-community/kandinsky-2-2-decoder) \n
- [wuerstchen](https://huggingface.co/warp-ai/wuerstchen) \n
- [sdxl-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) \n
- [karlo](https://huggingface.co/datasets/diffusers-parti-prompts/karlo-v1) \n
"""

GALLERY_COLUMN_NUM = len(SUBMISSIONS)

with gr.Blocks() as demo:
    gr.Markdown(TITLE)
    with gr.Column(visible=True) as intro_view:
        gr.Markdown(DESCRIPTION)

    headers = ["prompt", "result", "id", "Challenge", "Category", "Note"] + [
        f"choice_{i}" for i in range(len(SUBMISSIONS))
    ]
    datatype = ["str", "str", "number", "str", "str", "str"] + len(SUBMISSIONS) * [
        "number"
    ]

    with gr.Column(visible=False):
        row_number = gr.Number(
            label="Current row selection index",
            value=0,
            precision=0,
            interactive=False,
        )

    # Create Data Frame
    with gr.Column(visible=False) as result_view:
        result = gr.Markdown("")
        dataframe = gr.Dataframe(
            headers=headers,
            datatype=datatype,
            row_count=NUM_QUESTIONS,
            col_count=(6 + len(SUBMISSIONS), "fixed"),
            interactive=False,
        )
        gr.Markdown("Click on start to play again!")

    with gr.Column(visible=True) as start_view:
        start_button = gr.Button("Start").style(full_width=True)
        gr.Markdown(NOTE)

    with gr.Column(visible=False):
        selected_image = gr.Number(label="Selected index", value=-1, precision=0)

    with gr.Column(visible=False) as gallery_view:
        with gr.Row():
            counter = gr.Markdown(f"***1/{NUM_QUESTIONS} {PROMPT_FORMAT}***")
        with gr.Row():
            prompt = gr.Markdown("")
        with gr.Blocks():
            with gr.Row():
                with gr.Column(min_width=200) as c1:
                    image_1 = gr.Image(interactive=False)
                    image_1_button = gr.Checkbox(False, label="Image 1").style(full_width=True)
                with gr.Column(min_width=200) as c2:
                    image_2 = gr.Image(interactive=False)
                    image_2_button = gr.Checkbox(False, label="Image 2").style(full_width=True)
                with gr.Column(min_width=200) as c3:
                    image_3 = gr.Image(interactive=False)
                    image_3_button = gr.Checkbox(False, label="Image 3").style(full_width=True)
                with gr.Column(min_width=200) as c4:
                    image_4 = gr.Image(interactive=False)
                    image_4_button = gr.Checkbox(False, label="Image 4").style(full_width=True)         
            with gr.Row():
                submit_button = gr.Button("Submit").style(full_width=True)         

    start_button.click(
        fn=start,
        inputs=[],
        outputs=dataframe,
        show_progress=True
    ).then(
        fn=lambda x: 0 if x == NUM_QUESTIONS else x,
        inputs=[row_number],
        outputs=[row_number],
    ).then(
        fn=change_view,
        inputs=[row_number, dataframe],
        outputs=[intro_view, result_view, gallery_view, start_view, result],
    ).then(
        fn=process, 
        inputs=[dataframe],
        outputs=[image_1, image_2, image_3, image_4, image_1_button, image_2_button, image_3_button, image_4_button, prompt, counter]
    )

    def integerize(x1, x2, x3, x4):
        number = f"{int(x1)}{int(x2)}{int(x3)}{int(x4)}"
        return int(number)

    submit_button.click(
        fn=integerize,
        inputs=[image_1_button, image_2_button, image_3_button, image_4_button],
        outputs=[selected_image],
    ).then(
        fn=write_result,
        inputs=[selected_image, row_number, dataframe],
        outputs=[row_number, dataframe],
    ).then(
        fn=change_view,
        inputs=[row_number, dataframe],
        outputs=[intro_view, result_view, gallery_view, start_view, result]
    ).then(
        fn=process,
        inputs=[dataframe, row_number],
        outputs=[image_1, image_2, image_3, image_4, image_1_button, image_2_button, image_3_button, image_4_button, prompt, counter],
    ).then(
        fn=lambda x: 0 if x == NUM_QUESTIONS else x,
        inputs=[row_number],
        outputs=[row_number],
    ).then(
        fn=refresh,
        inputs=[row_number, dataframe],
        outputs=[dataframe],
    )

demo.launch()