import gradio as gr import duckdb from annoy import AnnoyIndex from sentence_transformers import SentenceTransformer model = SentenceTransformer("sentence-transformers/LaBSE") annoy_index = AnnoyIndex(768, "angular") annoy_index.load("definitions.ann") conn = duckdb.connect("sonajaht.db") def search_query(query, top_k=10): query_vector = model.encode(query) similar_item_ids, distances = annoy_index.get_nns_by_vector( query_vector, top_k, include_distances=True ) id_list = ", ".join(map(str, similar_item_ids)) sql_query = f""" SELECT w.value AS sõna, d.value AS definitsioon FROM definitions d JOIN words w ON d.word_id = w.word_id WHERE d.entry_id IN ({id_list}) ORDER BY CASE d.entry_id {' '.join([f'WHEN {_id} THEN {i}' for i, _id in enumerate(similar_item_ids)])} END """ results = conn.execute(sql_query).fetchdf() results["#"] = list(range(1, len(results) + 1)) new_order = ["#", "sõna", "definitsioon"] results = results[new_order] # results["relevance_score"] = [1 - d for d in distances] return results examples = [ "väga vana mees", "очень старый дед", "un très vieil homme", "a clear material that you can see through used to make windows", "to have a rule that you need a specific object or thing in some situation", "something that makes you happy or makes you laugh", "when an event happens or takes place", "часть стерео системы, из которой исходит музыка", "кто-то, кто использует что-то", ] def handle_example(example): return example, search_query(example) with gr.Blocks() as demo: gr.Markdown("# Sõnajaht Demo") query_input = gr.Textbox(label="Sisestage teie otsingupäring") search_button = gr.Button("Otsi") gr.Examples( examples=examples, inputs=query_input, label="Otsi päringunäiteid" ) results_output = gr.Dataframe(label="Otsingutulemused") search_button.click(search_query, inputs=query_input, outputs=results_output) if __name__ == "__main__": demo.launch()