|
import os
|
|
import sys
|
|
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
|
|
import json
|
|
import time
|
|
|
|
import numpy as np
|
|
import torch
|
|
from PIL import Image
|
|
|
|
from models.base_model import BaseModelMainModel
|
|
from utils import configs
|
|
from utils.functional import euclidean_distance_normalized, image_augmentations
|
|
|
|
from .backbone_model import CLIPModel, TorchModel
|
|
|
|
|
|
class PrototypicalNetworksModel(BaseModelMainModel):
|
|
def __init__(
|
|
self,
|
|
name_model: str,
|
|
freeze_model: bool,
|
|
pretrained_model: bool,
|
|
support_set_method: str,
|
|
):
|
|
super().__init__(name_model, freeze_model, pretrained_model, support_set_method)
|
|
self.init_model()
|
|
self.load_embedded_value()
|
|
|
|
def init_model(self):
|
|
if self.name_model == "clip":
|
|
self.model = CLIPModel(
|
|
configs.CLIP_NAME_MODEL, self.freeze_model, self.pretrained_model
|
|
)
|
|
else:
|
|
self.model = TorchModel(
|
|
self.name_model, self.freeze_model, self.pretrained_model
|
|
)
|
|
self.model.eval()
|
|
|
|
def predict(self, image: np.ndarray) -> dict:
|
|
image_input = image_augmentations()(image=image)["image"]
|
|
image_input = image_input.unsqueeze(axis=0)
|
|
with torch.no_grad():
|
|
start_time = time.perf_counter()
|
|
image_input = self.model(image_input)
|
|
end_time = time.perf_counter() - start_time
|
|
image_input = image_input.detach().cpu().numpy()
|
|
results_distance = {}
|
|
for key, value in self.embedded_values.items():
|
|
results_distance[key] = euclidean_distance_normalized(image_input, value)
|
|
results = sorted(results_distance.items(), key=lambda x: x[1], reverse=True)[0]
|
|
result_class = results[0]
|
|
result_distance = results[1]
|
|
return {
|
|
"character": result_class
|
|
if result_distance
|
|
> configs.NAME_MODELS[self.name_model]["image_similarity_threshold"]
|
|
else configs.CLASS_CHARACTERS[-1],
|
|
"confidence": result_distance,
|
|
"inference_time": end_time,
|
|
}
|
|
|
|
def load_embedded_value(self):
|
|
with open(
|
|
os.path.join(
|
|
configs.EMBEDDED_VALUES_PATH,
|
|
self.name_model,
|
|
self.support_set_method,
|
|
"embedded_value.json",
|
|
),
|
|
"r",
|
|
) as f:
|
|
self.embedded_values = json.load(f)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
model = PrototypicalNetworksModel("mobilenetv3_large_100", True, True, "5_shot")
|
|
image = np.array(
|
|
Image.open(
|
|
"../../assets/example_images/gon/306e5d35-b301-4299-8022-0c89dc0b7690.png"
|
|
).convert("RGB")
|
|
)
|
|
print(model.predict(image)["character"])
|
|
|