|
import gradio as gr |
|
from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer |
|
import sentence_transformers |
|
from sentence_transformers import SentenceTransformer, util |
|
import pickle |
|
from PIL import Image |
|
import os |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") |
|
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") |
|
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32") |
|
|
|
|
|
emb_filename = 'lv-handbags.pkl' |
|
|
|
|
|
with open(emb_filename, 'rb') as fIn: |
|
img_names, img_emb = pickle.load(fIn) |
|
|
|
|
|
|
|
|
|
def search_text(query, top_k=4): |
|
"""" Search an image based on the text query. |
|
|
|
Args: |
|
query ([string]): [query you want search for] |
|
top_k (int, optional): [Amount of images o return]. Defaults to 1. |
|
|
|
Returns: |
|
[list]: [list of images that are related to the query.] |
|
""" |
|
|
|
inputs = tokenizer([query], padding=True, return_tensors="pt") |
|
query_emb = model.get_text_features(**inputs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
hits = util.semantic_search(query_emb, img_emb, top_k=top_k)[0] |
|
|
|
image=[] |
|
for hit in hits: |
|
|
|
|
|
object = Image.open(os.path.join("lvphotos/", img_names[hit['corpus_id']])) |
|
image.append(object) |
|
|
|
|
|
return image |
|
|
|
|
|
iface = gr.Interface( |
|
title = "Hushh Text to Image using CLIP Model on Louis Vuitton API", |
|
description = "Quick demo of using text to perform vector search on an image collection", |
|
article = "TBD", |
|
fn=search_text, |
|
inputs=[gr.Textbox(lines=4, |
|
label="Write what you are looking for in an image...", |
|
placeholder="Text Here...")], |
|
outputs=[gr.Gallery( |
|
label="Generated images", show_label=False, elem_id="gallery" |
|
).style(grid=[2], height="auto")] |
|
,examples=[ |
|
[("Small Purse")], |
|
[("Big Bag")], |
|
] |
|
).launch(debug=True) |
|
|