import gradio as gr import duckdb from annoy import AnnoyIndex from sentence_transformers import SentenceTransformer model = SentenceTransformer("sentence-transformers/LaBSE") annoy_index = AnnoyIndex(768, "angular") annoy_index.load("definitions.ann") conn = duckdb.connect("sonajaht.db") def search_query(query, top_k=10): query_vector = model.encode(query) similar_item_ids, distances = annoy_index.get_nns_by_vector( query_vector, top_k, include_distances=True ) id_list = ", ".join(map(str, similar_item_ids)) sql_query = f""" SELECT w.value AS sõna, d.value AS definitsioon FROM definitions d JOIN words w ON d.word_id = w.word_id WHERE d.entry_id IN ({id_list}) ORDER BY CASE d.entry_id {' '.join([f'WHEN {_id} THEN {i}' for i, _id in enumerate(similar_item_ids)])} END """ results = conn.execute(sql_query).fetchdf() results["#"] = list(range(1, len(results) + 1)) new_order = ["#", "sõna", "definitsioon"] results = results[new_order] # results["relevance_score"] = [1 - d for d in distances] return results examples = [ "väga vana mees", "очень старый дед", "un très vieil homme" ] def handle_example(example): return example, search_query(example) with gr.Blocks() as demo: gr.Markdown("# Sõnajaht Demo") query_input = gr.Textbox(label="Sisestage teie otsingupäring") search_button = gr.Button("Otsi") with gr.Row(): example_buttons = [gr.Button(example) for example in examples] results_output = gr.Dataframe(label="Otsingutulemused") search_button.click(search_query, inputs=query_input, outputs=results_output) for button in example_buttons: button.click( handle_example, inputs=gr.State(button.value), outputs=[query_input, results_output], ) if __name__ == "__main__": demo.launch()