Spaces:
Running
Running
File size: 2,896 Bytes
e75a985 e205ab6 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 8d54808 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 e75a985 b462137 7a8ad03 b462137 e75a985 b462137 e75a985 b8ee19c e75a985 b8ee19c b462137 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
"""
Builds a similarity index for a directory of images
"""
import glob
import os
import sys
import tarfile
import requests
import streamlit as st
from PIL import Image
from txtai.embeddings import Embeddings
class Application:
"""
Main application
"""
def __init__(self, directory):
"""
Creates a new application.
Args:
directory: directory of images
"""
self.embeddings = self.build(directory)
def build(self, directory):
"""
Builds an image embeddings index.
Args:
directory: directory with images
Returns:
Embeddings index
"""
embeddings = Embeddings({"method": "sentence-transformers", "path": "clip-ViT-B-32"})
embeddings.index(self.images(directory))
# Update model to support multilingual queries
embeddings.config["path"] = "sentence-transformers/clip-ViT-B-32-multilingual-v1"
embeddings.model = embeddings.loadvectors()
return embeddings
def images(self, directory):
"""
Generator that loops over each image in a directory.
Args:
directory: directory with images
"""
for path in glob.glob(directory + "/*jpg") + glob.glob(directory + "/*png"):
yield (path, Image.open(path), None)
def run(self):
"""
Runs a Streamlit application.
"""
st.title("Image search")
st.markdown("This application shows how images and text can be embedded into the same space to support similarity search. ")
st.markdown(
"[sentence-transformers](https://github.com/UKPLab/sentence-transformers/tree/master/examples/applications/image-search) "
+ "recently added support for the [OpenAI CLIP model](https://github.com/openai/CLIP). This model embeds text and images into "
+ "the same space, enabling image similarity search. txtai can directly utilize these models."
)
query = st.text_input("Search query:")
if query:
index, _ = self.embeddings.search(query, 1)[0]
st.image(Image.open(index))
@st.cache(allow_output_mutation=True)
def create(directory):
"""
Creates and caches a Streamlit application.
Args:
directory: directory of images to index
Returns:
Application
"""
return Application(directory)
if __name__ == "__main__":
os.environ["TOKENIZERS_PARALLELISM"] = "false"
files = "/tmp/txtai"
if not os.path.exists(files):
os.makedirs(files)
response = requests.get("https://github.com/neuml/txtai/releases/download/v3.5.0/tests.tar.gz", stream=True)
f = tarfile.open(fileobj=response.raw, mode="r|gz")
f.extractall(path="/tmp")
# Create and run application
app = create(files)
app.run()
|