imagesearch / app.py
davidmezzetti's picture
Update app.py
8d54808
"""
Builds a similarity index for a directory of images
"""
import glob
import os
import sys
import tarfile
import requests
import streamlit as st
from PIL import Image
from txtai.embeddings import Embeddings
class Application:
"""
Main application
"""
def __init__(self, directory):
"""
Creates a new application.
Args:
directory: directory of images
"""
self.embeddings = self.build(directory)
def build(self, directory):
"""
Builds an image embeddings index.
Args:
directory: directory with images
Returns:
Embeddings index
"""
embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"})
embeddings.index(self.images(directory))
# Update model to support multilingual queries
embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
embeddings.model = embeddings.loadvectors()
return embeddings
def images(self, directory):
"""
Generator that loops over each image in a directory.
Args:
directory: directory with images
"""
for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"):
yield (path, Image.open(path), None)
def run(self):
"""
Runs a Streamlit application.
"""
st.title("Image search")
st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ")
st.markdown(
"[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) "
+ "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into "
+ "the same space, enabling image similarity search. txtai can directly utilize these models."
)
query = st.text_input("Search query:")
if query:
index, _ = self.embeddings.search(query, 1)[0]
st.image(Image.open(index))
@st.cache(allow_output_mutation=True)
def create(directory):
"""
Creates and caches a Streamlit application.
Args:
directory: directory of images to index
Returns:
Application
"""
return Application(directory)
if __name__ == "__main__":
os.environ["TOKENIZERS_PARALLELISM"] = "false"
files = "/tmp/txtai"
if not os.path.exists(files):
os.makedirs(files)
response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True)
f = tarfile.open(fileobj=response.raw, mode="r|gz")
f.extractall(path="/tmp")
# Create and run application
app = create(files)
app.run()