File size: 1,927 Bytes
b54b44d 41cb748 b54b44d 41cb748 b54b44d 41cb748 8a8cb97 41cb748 678e3c2 8a8cb97 41cb748 8a8cb97 678e3c2 ffb6903 41cb748 edf2a16 41cb748 edf2a16 41cb748 f546480 ae048b2 41cb748 8108d07 42f2b0c edf2a16 41cb748 edf2a16 41cb748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from sentence_transformers import SentenceTransformer, util
import torch
# Load the pre-trained model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
def find_cosine_similarity(text1, text2):
global model
# Encode the texts to get their embeddings
embedding1 = model.encode(text1, convert_to_tensor=True)
embedding2 = model.encode(text2, convert_to_tensor=True)
# Compute cosine similarity
cosine_sim = util.pytorch_cos_sim(embedding1, embedding2)
# Print the cosine similarity score
#print(f"Cosine Similarity: {cosine_sim.item()}")
return cosine_sim.item()
def find_embedding(texts, lim=None):
global model
embeddings = []
c = 0
for text in texts:
if lim and c > lim:
continue
else:
c += 1
print(f"Finding embedding for {text}")
embeddings.append(model.encode(text, convert_to_tensor=True))
return embeddings
def find_relevant_file_paths(ingredient, embeddings, titles, N=2, thres=0.7):
global model
file_paths = []
file_titles = []
embedding_ingredient = model.encode(ingredient, convert_to_tensor=True)
cosine_sims_dict = {}
title_num = 0
for embedding in embeddings:
# Compute cosine similarity
title_num += 1
cosine_sim = util.pytorch_cos_sim(embedding_ingredient, embedding)
cosine_sims_dict.update({title_num:cosine_sim})
#Sort cosine_sims_dict based on value of cosine_sim
top_n_cosine_sims_dict = dict(sorted(cosine_sims_dict.items(), key=lambda item: item[1], reverse=True)[:N])
print(f"DEBUG : Ingredient {ingredient} top_n_cosine_sims_dict : {top_n_cosine_sims_dict}")
for key, value in top_n_cosine_sims_dict.items():
if value.item() > thres:
file_paths.append(f"article{key}.txt")
file_titles.append(titles[key-1])
return file_paths, file_titles
|