Spaces:

shivanis14
/

FoodProductQualityAnalysis

Sleeping

File size: 1,927 Bytes

b54b44d
 
 
41cb748
 
 
b54b44d
41cb748
b54b44d
 
 
 
 
 
 
 
 
41cb748
 
8a8cb97
41cb748
 
678e3c2
 
8a8cb97
41cb748
8a8cb97
678e3c2
 
 
ffb6903
41cb748
 
 
 
edf2a16
41cb748
 
edf2a16
41cb748
 
 
 
 
 
 
 
 
 
 
 
f546480
ae048b2
41cb748
8108d07
42f2b0c
edf2a16
41cb748
edf2a16
41cb748

from sentence_transformers import SentenceTransformer, util
import torch

# Load the pre-trained model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def find_cosine_similarity(text1, text2):
  global model
  # Encode the texts to get their embeddings
  embedding1 = model.encode(text1, convert_to_tensor=True)
  embedding2 = model.encode(text2, convert_to_tensor=True)
  
  # Compute cosine similarity
  cosine_sim = util.pytorch_cos_sim(embedding1, embedding2)
  
  # Print the cosine similarity score
  #print(f"Cosine Similarity: {cosine_sim.item()}")
  return cosine_sim.item()

def find_embedding(texts, lim=None):
    global model
    embeddings = []

    c = 0

    for text in texts:
        if lim and c > lim:
            continue
        else:
            c += 1
        print(f"Finding embedding for {text}")
        embeddings.append(model.encode(text, convert_to_tensor=True))

    return embeddings

def find_relevant_file_paths(ingredient, embeddings, titles, N=2, thres=0.7):
    global model
    file_paths = []
    file_titles = []
    
    embedding_ingredient = model.encode(ingredient, convert_to_tensor=True)
    cosine_sims_dict = {}
    title_num = 0
    for embedding in embeddings:
        # Compute cosine similarity
        title_num += 1
        cosine_sim = util.pytorch_cos_sim(embedding_ingredient, embedding)
        cosine_sims_dict.update({title_num:cosine_sim})

    #Sort cosine_sims_dict based on value of cosine_sim
    top_n_cosine_sims_dict = dict(sorted(cosine_sims_dict.items(), key=lambda item: item[1], reverse=True)[:N])
    print(f"DEBUG : Ingredient {ingredient} top_n_cosine_sims_dict : {top_n_cosine_sims_dict}")
    
    for key, value in top_n_cosine_sims_dict.items():
        if value.item() > thres:
            file_paths.append(f"article{key}.txt")
            file_titles.append(titles[key-1])
        
    return file_paths, file_titles