File size: 1,927 Bytes
b54b44d
 
 
41cb748
 
 
b54b44d
41cb748
b54b44d
 
 
 
 
 
 
 
 
41cb748
 
8a8cb97
41cb748
 
678e3c2
 
8a8cb97
41cb748
8a8cb97
678e3c2
 
 
ffb6903
41cb748
 
 
 
edf2a16
41cb748
 
edf2a16
41cb748
 
 
 
 
 
 
 
 
 
 
 
f546480
ae048b2
41cb748
8108d07
42f2b0c
edf2a16
41cb748
edf2a16
41cb748
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from sentence_transformers import SentenceTransformer, util
import torch

# Load the pre-trained model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def find_cosine_similarity(text1, text2):
  global model
  # Encode the texts to get their embeddings
  embedding1 = model.encode(text1, convert_to_tensor=True)
  embedding2 = model.encode(text2, convert_to_tensor=True)
  
  # Compute cosine similarity
  cosine_sim = util.pytorch_cos_sim(embedding1, embedding2)
  
  # Print the cosine similarity score
  #print(f"Cosine Similarity: {cosine_sim.item()}")
  return cosine_sim.item()

def find_embedding(texts, lim=None):
    global model
    embeddings = []

    c = 0

    for text in texts:
        if lim and c > lim:
            continue
        else:
            c += 1
        print(f"Finding embedding for {text}")
        embeddings.append(model.encode(text, convert_to_tensor=True))

    return embeddings

def find_relevant_file_paths(ingredient, embeddings, titles, N=2, thres=0.7):
    global model
    file_paths = []
    file_titles = []
    
    embedding_ingredient = model.encode(ingredient, convert_to_tensor=True)
    cosine_sims_dict = {}
    title_num = 0
    for embedding in embeddings:
        # Compute cosine similarity
        title_num += 1
        cosine_sim = util.pytorch_cos_sim(embedding_ingredient, embedding)
        cosine_sims_dict.update({title_num:cosine_sim})

    #Sort cosine_sims_dict based on value of cosine_sim
    top_n_cosine_sims_dict = dict(sorted(cosine_sims_dict.items(), key=lambda item: item[1], reverse=True)[:N])
    print(f"DEBUG : Ingredient {ingredient} top_n_cosine_sims_dict : {top_n_cosine_sims_dict}")
    
    for key, value in top_n_cosine_sims_dict.items():
        if value.item() > thres:
            file_paths.append(f"article{key}.txt")
            file_titles.append(titles[key-1])
        
    return file_paths, file_titles