wd-tagger-heatmap-more-models

Running

App Files Files Community

wd-tagger-heatmap-more-models / tagger /common.py

neggles

make the thing work

c24a176 8 months ago

raw

history blame contribute delete

4.99 kB

	import math
	from dataclasses import dataclass
	from functools import lru_cache
	from pathlib import Path
	from typing import Optional

	import numpy as np
	import pandas as pd
	import torch
	from huggingface_hub import hf_hub_download
	from huggingface_hub.utils import HfHubHTTPError
	from PIL import Image
	from torch import Tensor, nn


	@dataclass
	class Heatmap:
	label: str
	score: float
	image: Image.Image


	@dataclass
	class LabelData:
	names: list[str]
	rating: list[np.int64]
	general: list[np.int64]
	character: list[np.int64]


	@dataclass
	class ImageLabels:
	caption: str
	booru: str
	rating: dict[str, float]
	general: dict[str, float]
	character: dict[str, float]


	@lru_cache(maxsize=5)
	def load_labels_hf(
	repo_id: str,
	revision: Optional[str] = None,
	token: Optional[str] = None,
	) -> LabelData:
	try:
	csv_path = hf_hub_download(
	repo_id=repo_id, filename="selected_tags.csv", revision=revision, token=token
	)
	csv_path = Path(csv_path).resolve()
	except HfHubHTTPError as e:
	raise FileNotFoundError(f"selected_tags.csv failed to download from {repo_id}") from e

	df: pd.DataFrame = pd.read_csv(csv_path, usecols=["name", "category"])
	tag_data = LabelData(
	names=df["name"].tolist(),
	rating=list(np.where(df["category"] == 9)[0]),
	general=list(np.where(df["category"] == 0)[0]),
	character=list(np.where(df["category"] == 4)[0]),
	)

	return tag_data


	def mcut_threshold(probs: np.ndarray) -> float:
	"""
	Maximum Cut Thresholding (MCut)
	Largeron, C., Moulin, C., & Gery, M. (2012). MCut: A Thresholding Strategy
	for Multi-label Classification. In 11th International Symposium, IDA 2012
	(pp. 172-183).
	"""
	probs = probs[probs.argsort()[::-1]]
	diffs = probs[:-1] - probs[1:]
	idx = diffs.argmax()
	thresh = (probs[idx] + probs[idx + 1]) / 2
	return float(thresh)


	def pil_ensure_rgb(image: Image.Image) -> Image.Image:
	# convert to RGB/RGBA if not already (deals with palette images etc.)
	if image.mode not in ["RGB", "RGBA"]:
	image = image.convert("RGBA") if "transparency" in image.info else image.convert("RGB")
	# convert RGBA to RGB with white background
	if image.mode == "RGBA":
	canvas = Image.new("RGBA", image.size, (255, 255, 255))
	canvas.alpha_composite(image)
	image = canvas.convert("RGB")
	return image


	def pil_pad_square(
	image: Image.Image,
	fill: tuple[int, int, int] = (255, 255, 255),
	) -> Image.Image:
	w, h = image.size
	# get the largest dimension so we can pad to a square
	px = max(image.size)
	# pad to square with white background
	canvas = Image.new("RGB", (px, px), fill)
	canvas.paste(image, ((px - w) // 2, (px - h) // 2))
	return canvas


	def preprocess_image(
	image: Image.Image,
	size_px: int \| tuple[int, int],
	upscale: bool = True,
	) -> Image.Image:
	"""
	Preprocess an image to be square and centered on a white background.
	"""
	if isinstance(size_px, int):
	size_px = (size_px, size_px)

	# ensure RGB and pad to square
	image = pil_ensure_rgb(image)
	image = pil_pad_square(image)

	# resize to target size
	if image.size[0] < size_px[0] or image.size[1] < size_px[1]:
	if upscale is False:
	raise ValueError("Image is smaller than target size, and upscaling is disabled")
	image = image.resize(size_px, Image.LANCZOS)
	if image.size[0] > size_px[0] or image.size[1] > size_px[1]:
	image.thumbnail(size_px, Image.BICUBIC)

	return image


	def pil_make_grid(
	images: list[Image.Image],
	max_cols: int = 8,
	padding: int = 4,
	bg_color: tuple[int, int, int] = (40, 42, 54), # dracula background color
	partial_rows: bool = True,
	) -> Image.Image:
	n_cols = min(math.floor(math.sqrt(len(images))), max_cols)
	n_rows = math.ceil(len(images) / n_cols)

	# if the final row is not full and partial_rows is False, remove a row
	if n_cols * n_rows > len(images) and not partial_rows:
	n_rows -= 1

	# assumes all images are same size
	image_width, image_height = images[0].size

	canvas_width = ((image_width + padding) * n_cols) + padding
	canvas_height = ((image_height + padding) * n_rows) + padding

	canvas = Image.new("RGB", (canvas_width, canvas_height), bg_color)
	for i, img in enumerate(images):
	x = (i % n_cols) * (image_width + padding) + padding
	y = (i // n_cols) * (image_height + padding) + padding
	canvas.paste(img, (x, y))

	return canvas


	# https://github.com/toriato/stable-diffusion-webui-wd14-tagger/blob/a9eacb1eff904552d3012babfa28b57e1d3e295c/tagger/ui.py#L368
	kaomojis = [
	"0_0",
	"(o)_(o)",
	"+_+",
	"+_-",
	"._.",
	"<o>_<o>",
	"<\|>_<\|>",
	"=_=",
	">_<",
	"3_3",
	"6_9",
	">_o",
	"@_@",
	"^_^",
	"o_o",
	"u_u",
	"x_x",
	"\|_\|",
	"\|\|_\|\|",
	]