from datasets import load_dataset
from collections import Counter, defaultdict
from random import sample, shuffle
import datasets
from pandas import DataFrame
from huggingface_hub import list_datasets
import os
import gradio as gr

import secrets


parti_prompt_results = []
ORG = "diffusers-parti-prompts"
SUBMISSIONS = {
    "sd-v1-5": load_dataset(os.path.join(ORG, "sd-v1-5"), keep_in_memory=True)["train"],
    "sd-v2-1": load_dataset(os.path.join(ORG, "sd-v2.1"), keep_in_memory=True)["train"],
    "if-v1-0": load_dataset(os.path.join(ORG, "karlo-v1"), keep_in_memory=True)["train"],
    "karlo": load_dataset(os.path.join(ORG, "if-v-1.0"), keep_in_memory=True)["train"],
    # "Kadinsky":
}

LINKS = {
    "sd-v1-5": "https://huggingface.co/runwayml/stable-diffusion-v1-5",
    "sd-v2-1": "https://huggingface.co/stabilityai/stable-diffusion-2-1",
    "if-v1-0": "https://huggingface.co/DeepFloyd/IF-I-XL-v1.0",
    "karlo": "https://huggingface.co/kakaobrain/karlo-v1-alpha",
}
SD_1_5_RESULT = """
"## The traditional one 🔥! 
![img](https://huggingface.co/datasets/OpenGenAI/logos/blob/main/37351293.png)
\n You mostly resonate with **Stable Diffusion 1-5** released by RunwayML.
\n Stable Diffusion 1-5 is the most used open-source text-to-image model offering an amazing speed-to-image-quality trade-off!
\n Check out your soulmate [here](https://huggingface.co/runwayml/stable-diffusion-v1-5).
"""
SD_2_1_RESULT = """
## The creative one 🎨! 
![img](https://huggingface.co/datasets/OpenGenAI/logos/resolve/main/7vmYr2XwVcPtkLzac_jxQ.png)
\n You mostly resonate with **Stable Diffusion 2-1** released by Stability AI.
\n Stable Diffusion 2-1 is the latest open-source release of Stable Diffusion and allows to render stunning images of much larger sizes than Stable Diffusion v1.
Try it out [here](https://huggingface.co/stabilityai/stable-diffusion-2-1).
"""
IF_V1_0 = """
## The powerful one ⚡! 
![img](https://huggingface.co/datasets/OpenGenAI/logos/blob/main/1662453741854-63170739dc97a974718be2c7.png)
\n You mostly resonate with **IF v1-0** released by DeepFloyd.
\n IF v1-0 is by far the largest of the open-sourced text-to-image models and is a very powerful image generator.
\n Besides being able to generate multiple complex concepts in the same image, IF v1-0 is also extremely good at generating text in images.
\n Check out your new best friend [here](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0)
"""
KARLO = """
## The precise one 🎯!
![img](https://huggingface.co/datasets/OpenGenAI/logos/blob/main/1670220967262-615ed619c807b26d117a49bd.png)
\n You mostly resonate with **Karlo** released by KakaoBrain.
\n Karlo is based on the same architecture as DALLE-2 and has been trained on the [well curated COYO dataset](https://huggingface.co/datasets/kakaobrain/coyo-700m).
\n Play around with it [here]("https://huggingface.co/kakaobrain/karlo-v1-alpha").
"""

RESULT = {
    "sd-v1-5": SD_1_5_RESULT,
    "sd-v2-1": SD_2_1_RESULT,
    "if-v1-0": IF_V1_0,
    "karlo": KARLO,
}
NUM_QUESTIONS = 10
MODEL_KEYS = "-".join(SUBMISSIONS.keys())
SUBMISSION_ORG = f"results-{MODEL_KEYS}"
PROMPT_FORMAT = " Pick the picture that best matches the prompt:"

submission_names = list(SUBMISSIONS.keys())
num_images = len(SUBMISSIONS[submission_names[0]])


def load_submissions():
    all_datasets = list_datasets(author=SUBMISSION_ORG)
    relevant_ids = [d.id for d in all_datasets]
    
    submitted_ids = []
    for _id in relevant_ids:
        ds = load_dataset(_id)["train"]
        submitted_ids += ds["id"]
    
    submitted_ids = Counter(submitted_ids)
    return submitted_ids


SUBMITTED_IDS = load_submissions()


def generate_random_hash(length=8):
    """
    Generates a random hash of specified length.
    
    Args:
        length (int): The length of the hash to generate.
        
    Returns:
        str: A random hash of specified length.
    """
    if length % 2 != 0:
        raise ValueError("Length should be an even number.")
    
    num_bytes = length // 2
    random_bytes = secrets.token_bytes(num_bytes)
    random_hash = secrets.token_hex(num_bytes)
    
    return random_hash
    

def refresh(row_number, dataframe):
    if row_number == NUM_QUESTIONS:
        submitted_ids = load_submissions()
        return start(submitted_ids)
    else:
        return dataframe

def start():
    ids = {id: 0 for id in range(num_images)}
    ids = {**ids, **SUBMITTED_IDS}

    # sort by count
    ids = sorted(ids.items(), key=lambda x: x[1])
    freq_ids = defaultdict(list)
    for k, v in ids:
        freq_ids[v].append(k)

    # shuffle in-between categories
    for k, v_list in freq_ids.items():
        shuffle(v_list)
        freq_ids[v] = v_list

    shuffled_ids = sum(list(freq_ids.values()), [])

    # get lowest count ids
    id_candidates = shuffled_ids[: (10 * NUM_QUESTIONS)]

    # get random `NUM_QUESTIONS` ids to check
    image_ids = sample(id_candidates, k=NUM_QUESTIONS)
    images = {}

    for i in range(NUM_QUESTIONS):
        order = list(range(len(SUBMISSIONS)))
        shuffle(order)

        id = image_ids[i]
        row = SUBMISSIONS[submission_names[0]][id]
        images[i] = {
            "prompt": row["Prompt"],
            "result": "",
            "id": id,
            "Challenge": row["Challenge"],
            "Category": row["Category"],
            "Note": row["Note"],
        }
        for n, m in enumerate(order):
            images[i][f"choice_{n}"] = m

    images_frame = DataFrame.from_dict(images, orient="index")
    return images_frame


def process(dataframe, row_number=0):
    if row_number == NUM_QUESTIONS:
        return None, None, None, None, "", ""

    image_id = dataframe.iloc[row_number]["id"]
    choices = [
        submission_names[dataframe.iloc[row_number][f"choice_{i}"]]
        for i in range(len(SUBMISSIONS))
    ]
    images = (SUBMISSIONS[c][int(image_id)]["images"] for c in choices)

    prompt = SUBMISSIONS[choices[0]][int(image_id)]["Prompt"]
    prompt = f'# "{prompt}"'
    counter = f"***{row_number + 1}/{NUM_QUESTIONS} {PROMPT_FORMAT}***"

    return *images, prompt, counter


def write_result(user_choice, row_number, dataframe):
    if row_number == NUM_QUESTIONS:
        return row_number, dataframe

    user_choice = int(user_choice)
    chosen_model = submission_names[dataframe.iloc[row_number][f"choice_{user_choice}"]]

    dataframe.loc[row_number, "result"] = chosen_model
    return row_number + 1, dataframe


def get_index(evt: gr.SelectData) -> int:
    return evt.index


def change_view(row_number, dataframe):
    if row_number == NUM_QUESTIONS:

        favorite_model = dataframe["result"].value_counts().idxmax()
        dataset = datasets.Dataset.from_pandas(dataframe)
        dataset = dataset.remove_columns(set(dataset.column_names) - set(["id", "result"]))
        hash = generate_random_hash()
        repo_id = os.path.join(SUBMISSION_ORG, hash)

        dataset.push_to_hub(repo_id, token=os.getenv("HF_TOKEN"))
        return {
            intro_view: gr.update(visible=False),
            result_view: gr.update(visible=True),
            gallery_view: gr.update(visible=False),
            start_view: gr.update(visible=True)
            result: RESULT[favorite_model],
        }
    else:
        return {
            intro_view: gr.update(visible=False),
            result_view: gr.update(visible=False),
            gallery_view: gr.update(visible=True),
            start_view: gr.update(visible=False)
            result: "",
        }


TITLE = "# What's Your AI Gen Personality 🧬?"

DESCRIPTION = """
*In this interactive game you are shown image descriptions along side 4 AI generated images.
\n Select the image that best fits the image description.
\n Answer **10** questions to find out what AI generator most resonates with you. 
\n Your submissions contribute to [**Open Parti Prompts Leaderboard**](https://huggingface.co/spaces/OpenGenAI/parti-prompts-leaderboard) ❤️
"""
EXPLANATION = """\n\n
## What to do 📖 \n\n

1. Click on 'Start'
2. A prompt and 4 different images are displayed
3. Select your favorite image
4. After 10 rounds your favorite diffusion model is displayed \n\n
"""
NOTE = """
The prompts you are shown originate from the [Parti Prompts](https://huggingface.co/datasets/nateraw/parti-prompts) dataset. 
Parti Prompts is designed to test text-to-image AI models on 1600+ prompts of varying difficulty and categories. 
The images you are shown have been pre-generated with 4 state-of-the-art open-sourced text-to-image models. \n\n
You answers will be used to contribute to the official [**Open Parti Prompts Leaderboard**](https://huggingface.co/spaces/OpenGenAI/parti-prompts-leaderboard). \n
By playing this game, you are greatly helping the community to have a better, **human-preference-aligned** metric to compare text-to-image models ❤️. \n\n
Every month, the generated images will be updated with possibly improved models.
"""

GALLERY_COLUMN_NUM = len(SUBMISSIONS)

with gr.Blocks() as demo:
    gr.Markdown(TITLE)
    with gr.Column(visible=True) as intro_view:
        gr.Markdown(DESCRIPTION)
        gr.Markdown(EXPLANATION)

    headers = ["prompt", "result", "id", "Challenge", "Category", "Note"] + [
        f"choice_{i}" for i in range(len(SUBMISSIONS))
    ]
    datatype = ["str", "str", "number", "str", "str", "str"] + len(SUBMISSIONS) * [
        "number"
    ]

    with gr.Column(visible=False):
        row_number = gr.Number(
            label="Current row selection index",
            value=0,
            precision=0,
            interactive=False,
        )

    # Create Data Frame
    with gr.Column(visible=False) as result_view:
        result = gr.Markdown("")
        dataframe = gr.Dataframe(
            headers=headers,
            datatype=datatype,
            row_count=NUM_QUESTIONS,
            col_count=(6 + len(SUBMISSIONS), "fixed"),
            interactive=False,
        )
        gr.Markdown("Click on start to play again!")

    with gr.Column(visible=True) as start_view:
        start_button = gr.Button("Start").style(full_width=True)
        gr.Markdown(NOTE)

    with gr.Column(visible=False):
        selected_image = gr.Number(label="Selected index", value=-1, precision=0)

    with gr.Column(visible=False) as gallery_view:
        with gr.Row():
            counter = gr.Markdown(f"***1/{NUM_QUESTIONS} {PROMPT_FORMAT}***")
        with gr.Row():
            prompt = gr.Markdown("")
        with gr.Blocks():
            with gr.Row():
                with gr.Column() as c1:
                    image_1 = gr.Image(interactive=False)
                    image_1_button = gr.Button("Select 1").style(full_width=True)
                with gr.Column() as c2:
                    image_2 = gr.Image(interactive=False)
                    image_2_button = gr.Button("Select 2").style(full_width=True)
                with gr.Column() as c3:
                    image_3 = gr.Image(interactive=False)
                    image_3_button = gr.Button("Select 3").style(full_width=True)
                with gr.Column() as c4:
                    image_4 = gr.Image(interactive=False)
                    image_4_button = gr.Button("Select 4").style(full_width=True)         

    start_button.click(
        fn=start,
        inputs=[],
        outputs=dataframe,
        show_progress=True
    ).then(
        fn=lambda x: 0 if x == NUM_QUESTIONS else x,
        inputs=[row_number],
        outputs=[row_number],
    ).then(
        fn=change_view,
        inputs=[row_number, dataframe],
        outputs=[intro_view, result_view, gallery_view, start_view, result]
    ).then(
        fn=process, inputs=[dataframe], outputs=[image_1, image_2, image_3, image_4, prompt, counter]
    )

    images = [image_1_button, image_2_button, image_3_button, image_4_button]
    for i, button in enumerate(images):
        button.click(
            fn=lambda: i,
            inputs=[],
            outputs=[selected_image],
        ).then(
            fn=write_result,
            inputs=[selected_image, row_number, dataframe],
            outputs=[row_number, dataframe],
        ).then(
            fn=change_view,
            inputs=[row_number, dataframe],
            outputs=[intro_view, result_view, gallery_view, start_view, result]
        ).then(
            fn=process,
            inputs=[dataframe, row_number],
            outputs=[image_1, image_2, image_3, image_4, prompt, counter]
        ).then(
            fn=lambda x: 0 if x == NUM_QUESTIONS else x,
            inputs=[row_number],
            outputs=[row_number],
        ).then(
            fn=refresh,
            inputs=[row_number, dataframe],
            outputs=[dataframe],
        )

demo.launch()