patrickvonplaten's picture
finish
9f7a953
raw
history blame
9.67 kB
from datasets import load_dataset
from collections import Counter, defaultdict
from random import sample, shuffle
import datasets
from pandas import DataFrame
from huggingface_hub import list_datasets
import os
import gradio as gr
import secrets
parti_prompt_results = []
ORG = "diffusers-parti-prompts"
SUBMISSIONS = {
"sd-v1-5": load_dataset(os.path.join(ORG, "sd-v1-5"))["train"],
"sd-v2-1": load_dataset(os.path.join(ORG, "sd-v2.1"))["train"],
"if-v1-0": load_dataset(os.path.join(ORG, "karlo-v1"))["train"],
"karlo": load_dataset(os.path.join(ORG, "if-v-1.0"))["train"],
# "Kadinsky":
}
LINKS = {
"sd-v1-5": "https://huggingface.co/runwayml/stable-diffusion-v1-5",
"sd-v2-1": "https://huggingface.co/stabilityai/stable-diffusion-2-1",
"if-v1-0": "https://huggingface.co/DeepFloyd/IF-I-XL-v1.0",
"karlo": "https://huggingface.co/kakaobrain/karlo-v1-alpha",
}
NUM_QUESTIONS = 10
MODEL_KEYS = "-".join(SUBMISSIONS.keys())
SUBMISSION_ORG = f"results-{MODEL_KEYS}"
PROMPT_FORMAT = " Pick the picture that best matches the prompt:"
submission_names = list(SUBMISSIONS.keys())
num_images = len(SUBMISSIONS[submission_names[0]])
def load_submissions():
return {}
all_datasets = list_datasets(author=SUBMISSION_ORG)
relevant_ids = [d.id for d in all_datasets]
submitted_ids = []
for _id in relevant_ids:
ds = load_dataset(_id)["train"]
submitted_ids += ds["id"]
submitted_ids = Counter(submitted_ids)
return submitted_ids
SUBMITTED_IDS = load_submissions()
def generate_random_hash(length=8):
"""
Generates a random hash of specified length.
Args:
length (int): The length of the hash to generate.
Returns:
str: A random hash of specified length.
"""
if length % 2 != 0:
raise ValueError("Length should be an even number.")
num_bytes = length // 2
random_bytes = secrets.token_bytes(num_bytes)
random_hash = secrets.token_hex(num_bytes)
return random_hash
def refresh(row_number, dataframe):
if row_number == NUM_QUESTIONS:
submitted_ids = load_submissions()
return start(submitted_ids)
else:
return dataframe
def start():
ids = {id: 0 for id in range(num_images)}
ids = {**ids, **SUBMITTED_IDS}
# sort by count
ids = sorted(ids.items(), key=lambda x: x[1])
freq_ids = defaultdict(list)
for k, v in ids:
freq_ids[v].append(k)
# shuffle in-between categories
for k, v_list in freq_ids.items():
shuffle(v_list)
freq_ids[v] = v_list
shuffled_ids = sum(list(freq_ids.values()), [])
# get lowest count ids
id_candidates = shuffled_ids[: (10 * NUM_QUESTIONS)]
# get random `NUM_QUESTIONS` ids to check
image_ids = sample(id_candidates, k=NUM_QUESTIONS)
images = {}
for i in range(NUM_QUESTIONS):
order = list(range(len(SUBMISSIONS)))
shuffle(order)
id = image_ids[i]
row = SUBMISSIONS[submission_names[0]][id]
images[i] = {
"prompt": row["Prompt"],
"result": "",
"id": id,
"Challenge": row["Challenge"],
"Category": row["Category"],
"Note": row["Note"],
}
for n, m in enumerate(order):
images[i][f"choice_{n}"] = m
images_frame = DataFrame.from_dict(images, orient="index")
return images_frame
def process(dataframe, row_number=0):
if row_number == NUM_QUESTIONS:
return None, None, None, None, "", ""
image_id = dataframe.iloc[row_number]["id"]
choices = [
submission_names[dataframe.iloc[row_number][f"choice_{i}"]]
for i in range(len(SUBMISSIONS))
]
images = (SUBMISSIONS[c][int(image_id)]["images"] for c in choices)
prompt = SUBMISSIONS[choices[0]][int(image_id)]["Prompt"]
prompt = f'# "{prompt}"'
counter = f"***{row_number + 1}/{NUM_QUESTIONS} {PROMPT_FORMAT}***"
return *images, prompt, counter
def write_result(user_choice, row_number, dataframe):
if row_number == NUM_QUESTIONS:
return row_number, dataframe
user_choice = int(user_choice)
chosen_model = submission_names[dataframe.iloc[row_number][f"choice_{user_choice}"]]
dataframe.loc[row_number, "result"] = chosen_model
return row_number + 1, dataframe
def get_index(evt: gr.SelectData) -> int:
return evt.index
def change_view(row_number, dataframe):
if row_number == NUM_QUESTIONS:
favorite_model = dataframe["result"].value_counts().idxmax()
dataset = datasets.Dataset.from_pandas(dataframe)
dataset = dataset.remove_columns(set(dataset.column_names) - set(["id", "result"]))
hash = generate_random_hash()
repo_id = os.path.join(SUBMISSION_ORG, hash)
dataset.push_to_hub(repo_id, token=os.getenv("HF_TOKEN"))
return {
intro_view: gr.update(visible=True),
result_view: gr.update(visible=True),
gallery_view: gr.update(visible=False),
result: f"You are of type: [**{favorite_model}**]({LINKS[favorite_model]}) πŸ”₯",
}
else:
return {
intro_view: gr.update(visible=False),
result_view: gr.update(visible=False),
gallery_view: gr.update(visible=True),
result: "",
}
TITLE = "# Community Parti Prompts - Who is your open-source genAI model?"
DESCRIPTION = """
*This is an interactive game in which you click through pre-generated images from SD-v1-5, SD-v2.1, Karlo, and IF
using [Parti Prompts](https://huggingface.co/datasets/nateraw/parti-prompts) prompts.* \n
*You choices will go into the public community [genAI leaderboard](TODO).*
"""
EXPLANATION = """\n\n
## How it works πŸ“– \n\n
1. Click on 'Start'
2. A prompt and 4 different images are displayed
3. Select your favorite image
4. After 10 rounds your favorite diffusion model is displayed
"""
GALLERY_COLUMN_NUM = len(SUBMISSIONS)
with gr.Blocks() as demo:
with gr.Column(visible=True) as intro_view:
gr.Markdown(TITLE)
gr.Markdown(DESCRIPTION)
gr.Markdown(EXPLANATION)
start_button = gr.Button("Start").style(full_width=False)
headers = ["prompt", "result", "id", "Challenge", "Category", "Note"] + [
f"choice_{i}" for i in range(len(SUBMISSIONS))
]
datatype = ["str", "str", "number", "str", "str", "str"] + len(SUBMISSIONS) * [
"number"
]
with gr.Column(visible=False):
row_number = gr.Number(
label="Current row selection index",
value=0,
precision=0,
interactive=False,
)
# Create Data Frame
with gr.Column(visible=False) as result_view:
result = gr.Markdown("")
dataframe = gr.Dataframe(
headers=headers,
datatype=datatype,
row_count=NUM_QUESTIONS,
col_count=(6 + len(SUBMISSIONS), "fixed"),
interactive=False,
)
gr.Markdown("Click on start to play again!")
with gr.Column(visible=False):
selected_image = gr.Number(label="Selected index", value=-1, precision=0)
with gr.Column(visible=False) as gallery_view:
with gr.Row():
counter = gr.Markdown(f"***1/{NUM_QUESTIONS} {PROMPT_FORMAT}***")
with gr.Row():
prompt = gr.Markdown("")
with gr.Blocks():
with gr.Row():
with gr.Column() as c1:
image_1 = gr.Image(interactive=False)
image_1_button = gr.Button("Select 1").style(full_width=True)
with gr.Column() as c2:
image_2 = gr.Image(interactive=False)
image_2_button = gr.Button("Select 2").style(full_width=True)
with gr.Column() as c3:
image_3 = gr.Image(interactive=False)
image_3_button = gr.Button("Select 3").style(full_width=True)
with gr.Column() as c4:
image_4 = gr.Image(interactive=False)
image_4_button = gr.Button("Select 4").style(full_width=True)
start_button.click(
fn=start,
inputs=[],
outputs=dataframe,
show_progress=True
).then(
fn=lambda x: 0 if x == NUM_QUESTIONS else x,
inputs=[row_number],
outputs=[row_number],
).then(
fn=change_view,
inputs=[row_number, dataframe],
outputs=[intro_view, result_view, gallery_view, result]
).then(
fn=process, inputs=[dataframe], outputs=[image_1, image_2, image_3, image_4, prompt, counter]
)
images = [image_1_button, image_2_button, image_3_button, image_4_button]
for i, button in enumerate(images):
button.click(
fn=lambda: i,
inputs=[],
outputs=[selected_image],
).then(
fn=write_result,
inputs=[selected_image, row_number, dataframe],
outputs=[row_number, dataframe],
).then(
fn=change_view,
inputs=[row_number, dataframe],
outputs=[intro_view, result_view, gallery_view, result]
).then(
fn=process,
inputs=[dataframe, row_number],
outputs=[image_1, image_2, image_3, image_4, prompt, counter]
).then(
fn=lambda x: 0 if x == NUM_QUESTIONS else x,
inputs=[row_number],
outputs=[row_number],
).then(
fn=refresh,
inputs=[row_number, dataframe],
outputs=[dataframe],
)
demo.launch()