hafidhsoekma's picture
First commit
49bceed
import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
import json
import time
import numpy as np
import torch
from PIL import Image
from models.base_model import BaseModelMainModel
from utils import configs
from utils.functional import euclidean_distance_normalized, image_augmentations
from .backbone_model import CLIPModel, TorchModel
class PrototypicalNetworksModel(BaseModelMainModel):
def __init__(
self,
name_model: str,
freeze_model: bool,
pretrained_model: bool,
support_set_method: str,
):
super().__init__(name_model, freeze_model, pretrained_model, support_set_method)
self.init_model()
self.load_embedded_value()
def init_model(self):
if self.name_model == "clip":
self.model = CLIPModel(
configs.CLIP_NAME_MODEL, self.freeze_model, self.pretrained_model
)
else:
self.model = TorchModel(
self.name_model, self.freeze_model, self.pretrained_model
)
self.model.eval()
def predict(self, image: np.ndarray) -> dict:
image_input = image_augmentations()(image=image)["image"]
image_input = image_input.unsqueeze(axis=0)
with torch.no_grad():
start_time = time.perf_counter()
image_input = self.model(image_input)
end_time = time.perf_counter() - start_time
image_input = image_input.detach().cpu().numpy()
results_distance = {}
for key, value in self.embedded_values.items():
results_distance[key] = euclidean_distance_normalized(image_input, value)
results = sorted(results_distance.items(), key=lambda x: x[1], reverse=True)[0]
result_class = results[0]
result_distance = results[1]
return {
"character": result_class
if result_distance
> configs.NAME_MODELS[self.name_model]["image_similarity_threshold"]
else configs.CLASS_CHARACTERS[-1],
"confidence": result_distance,
"inference_time": end_time,
}
def load_embedded_value(self):
with open(
os.path.join(
configs.EMBEDDED_VALUES_PATH,
self.name_model,
self.support_set_method,
"embedded_value.json",
),
"r",
) as f:
self.embedded_values = json.load(f)
if __name__ == "__main__":
model = PrototypicalNetworksModel("mobilenetv3_large_100", True, True, "5_shot")
image = np.array(
Image.open(
"../../assets/example_images/gon/306e5d35-b301-4299-8022-0c89dc0b7690.png"
).convert("RGB")
)
print(model.predict(image)["character"])