Update README.md
Browse files
README.md
CHANGED
@@ -54,8 +54,97 @@ slope_one.SlopeOne: A simple yet accurate collaborative filtering algorithm.
|
|
54 |
|
55 |
co_clustering.CoClustering: A collaborative filtering algorithm based on co-clustering.
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
Every model was used and evaluated. When faced with each other different methods presented different error estimatives.
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
## Intended uses
|
60 |
You can use the raw model for either masked language modeling or next sentence prediction, but it's mostly intended to
|
61 |
be fine-tuned on a downstream task. See the [model hub](https://www.google.com) to look for
|
|
|
54 |
|
55 |
co_clustering.CoClustering: A collaborative filtering algorithm based on co-clustering.
|
56 |
|
57 |
+
It is possible to pass a custom dataframe as an argument to this class. The dataframe in question needs to have 3 columns with the following name: ['userID', 'itemID', 'rating'].
|
58 |
+
|
59 |
+
```python
|
60 |
+
class Method:
|
61 |
+
def __init__(self,df):
|
62 |
+
|
63 |
+
self.df=df
|
64 |
+
self.available_methods=[
|
65 |
+
'surprise.NormalPredictor',
|
66 |
+
'surprise.BaselineOnly',
|
67 |
+
'surprise.KNNBasic',
|
68 |
+
'surprise.KNNWithMeans',
|
69 |
+
'surprise.KNNWithZScore',
|
70 |
+
'surprise.KNNBaseline',
|
71 |
+
'surprise.SVD',
|
72 |
+
'surprise.SVDpp',
|
73 |
+
'surprise.NMF',
|
74 |
+
'surprise.SlopeOne',
|
75 |
+
'surprise.CoClustering',
|
76 |
+
]
|
77 |
+
|
78 |
+
def show_methods(self):
|
79 |
+
print('The avaliable methods are:')
|
80 |
+
for i,method in enumerate(self.available_methods):
|
81 |
+
print(str(i)+': '+method)
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
def run(self,the_method):
|
86 |
+
self.the_method=the_method
|
87 |
+
if(self.the_method[0:8]=='surprise'):
|
88 |
+
self.run_surprise()
|
89 |
+
elif(self.the_method[0:6]=='Gensim'):
|
90 |
+
self.run_gensim()
|
91 |
+
elif(self.the_method[0:13]=='Transformers-'):
|
92 |
+
self.run_transformers()
|
93 |
+
else:
|
94 |
+
print('This method is not defined! Try another one.')
|
95 |
+
|
96 |
+
def run_surprise(self):
|
97 |
+
from surprise import Reader
|
98 |
+
from surprise import Dataset
|
99 |
+
from surprise.model_selection import train_test_split
|
100 |
+
reader = Reader(rating_scale=(1, 5))
|
101 |
+
data = Dataset.load_from_df(self.df[['userID', 'itemID', 'rating']], reader)
|
102 |
+
trainset, testset = train_test_split(data, test_size=.30)
|
103 |
+
the_method=self.the_method.replace("surprise.", "")
|
104 |
+
eval(f"exec('from surprise import {the_method}')")
|
105 |
+
the_algorithm=locals()[the_method]()
|
106 |
+
the_algorithm.fit(trainset)
|
107 |
+
self.predictions=the_algorithm.test(testset)
|
108 |
+
list_predictions=[(uid,iid,r_ui,est) for uid,iid,r_ui,est,_ in self.predictions]
|
109 |
+
self.predictions_df = pd.DataFrame(list_predictions, columns =['user_id', 'item_id', 'rating','predicted_rating'])
|
110 |
+
```
|
111 |
Every model was used and evaluated. When faced with each other different methods presented different error estimatives.
|
112 |
|
113 |
+
|
114 |
+
The surprise library provides 4 different methods to assess the accuracy of the ratings prediction. Those are: rmse, mse, mae and fcp. For further discussion on each metric please visit the package documentation.
|
115 |
+
|
116 |
+
```python
|
117 |
+
|
118 |
+
class Evaluator:
|
119 |
+
|
120 |
+
def __init__(self,predictions_df):
|
121 |
+
|
122 |
+
self.available_evaluators=['surprise.rmse','surprise.mse',
|
123 |
+
'surprise.mae','surprise.fcp']
|
124 |
+
self.predictions_df=predictions_df
|
125 |
+
|
126 |
+
def show_evaluators(self):
|
127 |
+
print('The avaliable evaluators are:')
|
128 |
+
for i,evaluator in enumerate(self.available_evaluators):
|
129 |
+
print(str(i)+': '+evaluator)
|
130 |
+
|
131 |
+
|
132 |
+
|
133 |
+
def run(self,the_evaluator):
|
134 |
+
self.the_evaluator=the_evaluator
|
135 |
+
if(self.the_evaluator[0:8]=='surprise'):
|
136 |
+
self.run_surprise()
|
137 |
+
else:
|
138 |
+
print('This evaluator is not available!')
|
139 |
+
|
140 |
+
def run_surprise(self):
|
141 |
+
import surprise
|
142 |
+
from surprise import accuracy
|
143 |
+
predictions=[surprise.prediction_algorithms.predictions.Prediction(row['user_id'],row['item_id'],row['rating'],row['predicted_rating'],{}) for index,row in self.predictions_df.iterrows()]
|
144 |
+
self.predictions=predictions
|
145 |
+
self.the_evaluator= 'accuracy.' + self.the_evaluator.replace("surprise.", "")
|
146 |
+
self.acc = eval(f'{self.the_evaluator}(predictions,verbose=True)')
|
147 |
+
```
|
148 |
## Intended uses
|
149 |
You can use the raw model for either masked language modeling or next sentence prediction, but it's mostly intended to
|
150 |
be fine-tuned on a downstream task. See the [model hub](https://www.google.com) to look for
|