from sentence_transformers import SentenceTransformer from scipy.spatial.distance import cosine import numpy as np import pandas as pd from datasets import load_dataset import pickle as pkl def recommend(query, n=5): # Load the model model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu') # Load the data # data = pd.read_csv('data/medium_articles.csv') data = load_dataset('Mohamed-BC/Articles')['train'].to_pandas() # get the embeddings a_embeddings = pkl.load(open('data/articles_embeddings.pkl', 'rb')) # Encode the query q_embedding = model.encode(query) # Calculate the cosine similarity cos_sim = np.array([1 - cosine(q_embedding, emb) for emb in a_embeddings[:1000]]) # Get the top n recommendations top_n = np.argsort(cos_sim)[-n:] return data.iloc[top_n]['title']