import duckdb import numpy as np from sentence_transformers import SentenceTransformer from safetensors.numpy import save_file from tqdm import tqdm conn = duckdb.connect("sonajaht.db") model = SentenceTransformer("sentence-transformers/LaBSE") query = "SELECT value FROM definitions" result = conn.execute(query) vectors = [] batch_size = 64 p_bar = tqdm() while True: chunk = result.fetchmany(batch_size) if not chunk: break values = [row[0] for row in chunk] vectors.append( model.encode( values, show_progress_bar=False, batch_size=batch_size, device="mps" ) ) p_bar.update(batch_size) vectors = np.concatenate(vectors) save_file(dict(vectors=vectors), "definitions.safetensors") conn.close()