""" Builds a similarity index for a directory of images """ import glob import os import sys import tarfile import requests import streamlit as st from PIL import Image from txtai.embeddings import Embeddings def images(directory): """ Generator that loops over each image in a directory. Args: directory: directory with images """ for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"): yield (path, Image.open(path), None) @st.cache(allow_output_mutation=True) def build(directory): """ Builds an image embeddings index. Args: directory: directory with images Returns: Embeddings index """ embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"}) embeddings.index(images(directory)) # Update model to support multilingual queries embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1" embeddings.model = embeddings.loadVectors() return embeddings def app(directory): """ Streamlit application that runs searches against an image embeddings index. Args: directory: directory with images """ # Build embeddings index embeddings = build(directory) st.title("Image search") st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ") st.markdown( "[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) " + "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into " + "the same space, enabling image similarity search. txtai can directly utilize these models." ) query = st.text_input("Search query:") if query: index, _ = embeddings.search(query, 1)[0] st.image(Image.open(index)) if __name__ == "__main__": os.environ["TOKENIZERS_PARALLELISM"] = "false" files = "/tmp/txtai" if not os.path.exists(files): os.makedirs(files) response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True) f = tarfile.open(fileobj=response.raw, mode="r|gz") f.extractall(path="/tmp") app(files)