Spaces:
Sleeping
Sleeping
File size: 1,935 Bytes
c532148 702c8d6 c532148 702c8d6 4e8a334 702c8d6 c532148 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import gradio as gr
import duckdb
from annoy import AnnoyIndex
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("sentence-transformers/LaBSE")
annoy_index = AnnoyIndex(768, "angular")
annoy_index.load("definitions.ann")
conn = duckdb.connect("sonajaht.db")
def search_query(query, top_k=10):
query_vector = model.encode(query)
similar_item_ids, distances = annoy_index.get_nns_by_vector(
query_vector, top_k, include_distances=True
)
id_list = ", ".join(map(str, similar_item_ids))
sql_query = f"""
SELECT w.value AS sõna, d.value AS definitsioon
FROM definitions d
JOIN words w ON d.word_id = w.word_id
WHERE d.entry_id IN ({id_list})
ORDER BY CASE d.entry_id
{' '.join([f'WHEN {_id} THEN {i}' for i, _id in enumerate(similar_item_ids)])}
END
"""
results = conn.execute(sql_query).fetchdf()
results["#"] = list(range(1, len(results) + 1))
new_order = ["#", "sõna", "definitsioon"]
results = results[new_order]
# results["relevance_score"] = [1 - d for d in distances]
return results
examples = [
"väga vana mees",
"очень старый дед",
"un très vieil homme"
]
def handle_example(example):
return example, search_query(example)
with gr.Blocks() as demo:
gr.Markdown("# Sõnajaht Demo")
query_input = gr.Textbox(label="Sisestage teie otsingupäring")
search_button = gr.Button("Otsi")
with gr.Row():
example_buttons = [gr.Button(example) for example in examples]
results_output = gr.Dataframe(label="Otsingutulemused")
search_button.click(search_query, inputs=query_input, outputs=results_output)
for button in example_buttons:
button.click(
handle_example,
inputs=gr.State(button.value),
outputs=[query_input, results_output],
)
if __name__ == "__main__":
demo.launch()
|