Spaces:
Sleeping
Sleeping
File size: 3,472 Bytes
2518257 9082b91 2518257 dbdc0fc 2518257 dbdc0fc 2518257 dbdc0fc 2518257 6d975f0 2518257 05a8d1c 2518257 05a8d1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import json
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
class Recommender :
def __init__(self, id_col, title_col, content_col, title_vec_col, content_vec_col):
self.title_vec_col = title_vec_col
self.content_vec_col = content_vec_col
self.title_col = title_col
self.content_col = content_col
self.id_col = id_col
def calculate_recom_scores (self, k, similarities) :
scores = list(enumerate(similarities[0]))
scores = sorted(scores, key=lambda x: x[1], reverse=True)
scores = scores[1: k + 1]
return scores
def str2arr (self, arr) :
output = list()
for string in arr :
data_list = json.loads(string)
# Convert the list to a NumPy array
data_array = np.array(data_list)
output.append(data_array)
return np.array(output)
# def recommend_k (self, table, k, id) :
# data = np.array(list(zip(*table)))
# # print(data.shape, data)
# idx = int(data[0][data[self.id_col]==id].item())
# titles = self.str2arr(data[self.title_vec_col, :])
# contents = self.str2arr(data[self.content_vec_col, :])
# print(titles.shape)
# print(titles[int(idx)].shape)
# titles_sim = cosine_similarity(titles[int(idx)].reshape(1, -1), titles)
# print(titles_sim.shape)
# contents_sim = cosine_similarity(contents[int(idx)].reshape(1, -1), contents)
# titles_scores = self.calculate_recom_scores(k, titles_sim)
# contents_scores = self.calculate_recom_scores(k, contents_sim)
# print(titles_scores)
# union_scores = np.union1d(titles_scores, contents_scores)
# print(type(union_scores))
# # union_scores = sorted(union_scores.tolist(), key=lambda x: x[1], reverse=True)
# union_scores = sorted(union_scores.tolist(), key=lambda x: x[1] if isinstance(x, tuple) else x, reverse=True)
# indices = [i[0] for i in union_scores]
# result = data[:, np.isin(data[0,:], indices)]
# return result[self.id_col, :].tolist()
def recommend_k(self, table, k, title):
data = np.array(list(zip(*table)))
idx = np.where(data == title)[0].tolist()[0]
titles = self.str2arr(data[self.title_vec_col, :])
contents = self.str2arr(data[self.content_vec_col, :])
titles_sim = cosine_similarity(titles[int(idx)].reshape(1, -1), titles)
contents_sim = cosine_similarity(contents[int(idx)].reshape(1, -1), contents)
titles_scores = self.calculate_recom_scores(k, titles_sim)
contents_scores = self.calculate_recom_scores(k, contents_sim)
# union_scores = np.union1d(titles_scores, contents_scores)
union_scores = list(set(titles_scores).union(set(contents_scores)))
union_scores = sorted(union_scores, key=lambda x: x[1] if isinstance(x, tuple) else x, reverse=True)
# indices = [i[0] if isinstance(i, tuple) else i for i in union_scores]
# result = data[:, np.isin(data[0, :], indices)]
unique_dict = {}
for t in union_scores:
if t[0] not in unique_dict or t[1] > unique_dict[t[0]][1]:
unique_dict[t[0]] = t
union_scores = list(unique_dict.values())
indices = [i[0] for i in union_scores]
titles = '\n'.join(table[self.title_col][indices].astype(str))
return indices, union_scores, titles
|