Spaces:
Running
Running
File size: 1,856 Bytes
f0947cd 073b11e f0947cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
"""
Basic similarity search example. Used in the original txtai demo.
"""
import os
import streamlit as st
from txtai.embeddings import Embeddings
class Application:
"""
Main application.
"""
def __init__(self):
"""
Creates a new application.
"""
# Create embeddings model, backed by sentence-transformers & transformers
self.embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"})
def run(self):
"""
Runs a Streamlit application.
"""
st.title("Similarity Search")
st.markdown("This application runs a basic similarity search that identifies the best matching row for a query.")
data = [
"US tops 5 million confirmed virus cases",
"Canada's last fully intact ice shelf has suddenly collapsed, forming a Manhattan-sized iceberg",
"Beijing mobilises invasion craft along coast as Taiwan tensions escalate",
"The National Park Service warns against sacrificing slower friends in a bear attack",
"Maine man wins $1M from $25 lottery ticket",
"Make huge profits without work, earn up to $100,000 a day",
]
data = st.text_area("Data", value="\n".join(data))
query = st.text_input("Query")
data = data.split("\n")
if query:
# Get index of best section that best matches query
uid = self.embeddings.similarity(query, data)[0][0]
st.write(data[uid])
@st.cache(allow_output_mutation=True)
def create():
"""
Creates and caches a Streamlit application.
Returns:
Application
"""
return Application()
if __name__ == "__main__":
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# Create and run application
app = create()
app.run()
|