Spaces:

NeuML
/

imagesearch

Running

App Files Files Community

imagesearch / app.py

davidmezzetti

Update app.py

7a8ad03 about 3 years ago

raw

history blame

2.9 kB

	"""
	Builds a similarity index for a directory of images
	"""

	import glob
	import os
	import sys
	import tarfile

	import requests
	import streamlit as st

	from PIL import Image

	from txtai.embeddings import Embeddings


	class Application:
	"""
	Main application
	"""

	def __init__(self, directory):
	"""
	Creates a new application.

	Args:
	directory: directory of images
	"""

	self.embeddings = self.build(directory)

	def build(self, directory):
	"""
	Builds an image embeddings index.

	Args:
	directory: directory with images

	Returns:
	Embeddings index
	"""

	embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"})
	embeddings.index(self.images(directory))

	# Update model to support multilingual queries
	embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
	embeddings.model = embeddings.loadVectors()

	return embeddings

	def images(self, directory):
	"""
	Generator that loops over each image in a directory.

	Args:
	directory: directory with images
	"""

	for path in glob.glob(directory + "/jpg") + glob.glob(directory + "/png"):
	yield (path, Image.open(path), None)

	def run(self):
	"""
	Runs a Streamlit application.
	"""

	st.title("Image search")

	st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ")
	st.markdown(
	"[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) "
	+ "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into "
	+ "the same space, enabling image similarity search. txtai can directly utilize these models."
	)

	query = st.text_input("Search query:")
	if query:
	index, _ = self.embeddings.search(query, 1)[0]
	st.image(Image.open(index))


	@st.cache(allow_output_mutation=True)
	def create(directory):
	"""
	Creates and caches a Streamlit application.

	Args:
	directory: directory of images to index

	Returns:
	Application
	"""

	return Application(directory)

	if __name__ == "__main__":
	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	files = "/tmp/txtai"
	if not os.path.exists(files):
	os.makedirs(files)

	response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True)
	f = tarfile.open(fileobj=response.raw, mode="r\|gz")
	f.extractall(path="/tmp")

	# Create and run application
	app = create(files)
	app.run()