File size: 3,472 Bytes
2518257
9082b91
2518257
 
 
 
dbdc0fc
2518257
 
dbdc0fc
 
2518257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbdc0fc
2518257
 
6d975f0
2518257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05a8d1c
2518257
05a8d1c
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import json
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

class Recommender :
    def __init__(self, id_col, title_col, content_col, title_vec_col, content_vec_col):
        self.title_vec_col = title_vec_col
        self.content_vec_col = content_vec_col
        self.title_col = title_col
        self.content_col = content_col
        self.id_col = id_col

    def calculate_recom_scores (self, k, similarities) :
        scores = list(enumerate(similarities[0]))
        scores = sorted(scores, key=lambda x: x[1], reverse=True)
        scores = scores[1: k + 1]
        return scores

    def str2arr (self, arr) :
        output = list()
        for string in arr :
            data_list = json.loads(string)

            # Convert the list to a NumPy array
            data_array = np.array(data_list)
            output.append(data_array)

        return np.array(output)

    # def recommend_k (self, table, k, id) :

    #     data = np.array(list(zip(*table)))
    #     # print(data.shape, data)
    #     idx = int(data[0][data[self.id_col]==id].item())

    #     titles = self.str2arr(data[self.title_vec_col, :])
    #     contents = self.str2arr(data[self.content_vec_col, :])
    #     print(titles.shape)
    #     print(titles[int(idx)].shape)

    #     titles_sim = cosine_similarity(titles[int(idx)].reshape(1, -1), titles)
    #     print(titles_sim.shape)
    #     contents_sim = cosine_similarity(contents[int(idx)].reshape(1, -1), contents)

    #     titles_scores = self.calculate_recom_scores(k, titles_sim)
    #     contents_scores = self.calculate_recom_scores(k, contents_sim)

    #     print(titles_scores)
    #     union_scores = np.union1d(titles_scores, contents_scores)

    #     print(type(union_scores))
    #     # union_scores = sorted(union_scores.tolist(), key=lambda x: x[1], reverse=True)

    #     union_scores = sorted(union_scores.tolist(), key=lambda x: x[1] if isinstance(x, tuple) else x, reverse=True)

    #     indices = [i[0] for i in union_scores]
    #     result = data[:, np.isin(data[0,:], indices)]

    #     return result[self.id_col, :].tolist()

    def recommend_k(self, table, k, title):

      data = np.array(list(zip(*table)))
      idx = np.where(data == title)[0].tolist()[0]
      titles = self.str2arr(data[self.title_vec_col, :])
      contents = self.str2arr(data[self.content_vec_col, :])

      titles_sim = cosine_similarity(titles[int(idx)].reshape(1, -1), titles)
      contents_sim = cosine_similarity(contents[int(idx)].reshape(1, -1), contents)

      titles_scores = self.calculate_recom_scores(k, titles_sim)
      contents_scores = self.calculate_recom_scores(k, contents_sim)

    #   union_scores = np.union1d(titles_scores, contents_scores)
      union_scores = list(set(titles_scores).union(set(contents_scores)))
      union_scores = sorted(union_scores, key=lambda x: x[1] if isinstance(x, tuple) else x, reverse=True)
    #   indices = [i[0] if isinstance(i, tuple) else i for i in union_scores]
    #   result = data[:, np.isin(data[0, :], indices)]

      unique_dict = {}
      for t in union_scores:
          if t[0] not in unique_dict or t[1] > unique_dict[t[0]][1]:
              unique_dict[t[0]] = t
      union_scores = list(unique_dict.values())
      indices = [i[0] for i in union_scores]
      titles = '\n'.join(table[self.title_col][indices].astype(str))

      return indices, union_scores, titles