sonajaht-demo / app.py
adorkin's picture
Reorder output columns
4e8a334 verified
raw
history blame
1.94 kB
import gradio as gr
import duckdb
from annoy import AnnoyIndex
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("sentence-transformers/LaBSE")
annoy_index = AnnoyIndex(768, "angular")
annoy_index.load("definitions.ann")
conn = duckdb.connect("sonajaht.db")
def search_query(query, top_k=10):
query_vector = model.encode(query)
similar_item_ids, distances = annoy_index.get_nns_by_vector(
query_vector, top_k, include_distances=True
)
id_list = ", ".join(map(str, similar_item_ids))
sql_query = f"""
SELECT w.value AS sõna, d.value AS definitsioon
FROM definitions d
JOIN words w ON d.word_id = w.word_id
WHERE d.entry_id IN ({id_list})
ORDER BY CASE d.entry_id
{' '.join([f'WHEN {_id} THEN {i}' for i, _id in enumerate(similar_item_ids)])}
END
"""
results = conn.execute(sql_query).fetchdf()
results["#"] = list(range(1, len(results) + 1))
new_order = ["#", "sõna", "definitsioon"]
results = results[new_order]
# results["relevance_score"] = [1 - d for d in distances]
return results
examples = [
"väga vana mees",
"очень старый дед",
"un très vieil homme"
]
def handle_example(example):
return example, search_query(example)
with gr.Blocks() as demo:
gr.Markdown("# Sõnajaht Demo")
query_input = gr.Textbox(label="Sisestage teie otsingupäring")
search_button = gr.Button("Otsi")
with gr.Row():
example_buttons = [gr.Button(example) for example in examples]
results_output = gr.Dataframe(label="Otsingutulemused")
search_button.click(search_query, inputs=query_input, outputs=results_output)
for button in example_buttons:
button.click(
handle_example,
inputs=gr.State(button.value),
outputs=[query_input, results_output],
)
if __name__ == "__main__":
demo.launch()