Spaces:
Running
Running
import os | |
import torch | |
from safetensors.torch import load_file | |
from huggingface_hub import hf_hub_download | |
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection | |
from model import AestheticPredictorModel | |
class CityAestheticsPipeline: | |
""" | |
Demo model pipeline for [image=>score] prediction | |
Accepts a single model path on initialization. | |
Resulting object can be called directly with a PIL image as the input | |
Returns a single float value with the predicted score [0.0;1.0]. | |
""" | |
clip_ver = "openai/clip-vit-large-patch14" | |
def __init__(self, model_path, device="cpu", clip_dtype=torch.float32): | |
self.device = device | |
self.clip_dtype = clip_dtype | |
self._init_clip() | |
self.model = self._load_model(model_path) | |
print("CityAesthetics: Pipeline init ok") # debug | |
def __call__(self, raw): | |
emb = self.get_clip_emb(raw) | |
return self.get_model_pred(self.model, emb) | |
def get_model_pred(self, model, emb): | |
with torch.no_grad(): | |
pred = model(emb) | |
return float(pred.detach().cpu().squeeze(0)) | |
def get_clip_emb(self, raw): | |
img = self.proc( | |
images = raw, | |
return_tensors = "pt" | |
)["pixel_values"].to(self.clip_dtype).to(self.device) | |
with torch.no_grad(): | |
emb = self.clip(pixel_values=img) | |
return emb["image_embeds"].detach().to(torch.float32) | |
def _init_clip(self): | |
self.proc = CLIPImageProcessor.from_pretrained(self.clip_ver) | |
self.clip = CLIPVisionModelWithProjection.from_pretrained( | |
self.clip_ver, | |
device_map = self.device, | |
torch_dtype = self.clip_dtype, | |
) | |
def _load_model(self, path): | |
sd = load_file(path) | |
assert tuple(sd["up.0.weight"].shape) == (1024, 768) # only allow CLIP ver | |
model = AestheticPredictorModel() | |
model.eval() | |
model.load_state_dict(sd) | |
model.to(self.device) | |
return model | |
class CityAestheticsMultiModelPipeline(CityAestheticsPipeline): | |
""" | |
Demo multi-model pipeline for [image=>score] prediction | |
Accepts a list of model paths on initialization. | |
Resulting object can be called directly with a PIL image as the input. | |
Returns a dict with the model name as key and the score [0.0;1.0] as a value. | |
""" | |
def __init__(self, model_paths, device="cpu", clip_dtype=torch.float32): | |
self.device = device | |
self.clip_dtype = clip_dtype | |
self._init_clip() | |
self.models = {} | |
for path in model_paths: | |
name = os.path.splitext(os.path.basename(path))[0] | |
self.models[name] = self._load_model(path) | |
print("CityAesthetics: Pipeline init ok") # debug | |
def __call__(self, raw): | |
emb = self.get_clip_emb(raw) | |
out = {} | |
for name, model in self.models.items(): | |
pred = model(emb) | |
out[name] = self.get_model_pred(model, emb) | |
return out | |
def get_model_path(name, repo, token=True): | |
""" | |
Returns local model path or falls back to HF hub if required. | |
""" | |
fname = f"{name}.safetensors" | |
# local path: [models/AesPred-Anime-v1.8.safetensors] | |
path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"models") | |
if os.path.isfile(os.path.join(path, fname)): | |
print("CityAesthetics: Using local model") | |
return os.path.join(path, fname) | |
# huggingface hub fallback | |
print("CityAesthetics: Using HF Hub model") | |
return str(hf_hub_download( | |
token = token, | |
repo_id = repo, | |
filename = fname, | |
)) | |