Update app.py
Browse files
app.py
CHANGED
@@ -123,24 +123,33 @@ def get_recommendations_TFIDF(abstract):
|
|
123 |
tfidf_vectorizer = TfidfVectorizer()
|
124 |
# Generate the tf-idf vectors for the corpus
|
125 |
tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
|
|
|
126 |
# compute and print the cosine similarity matrix
|
127 |
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
|
128 |
|
129 |
# Get the pairwise similarity scores
|
130 |
sim_scores = list(enumerate(cosine_sim[-1]))
|
131 |
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
title = train_df['title'].iloc[paper_indices]
|
135 |
categories = train_df['categories'].iloc[paper_indices]
|
136 |
abstract = train_df['abstract'].iloc[paper_indices]
|
137 |
-
|
138 |
return title, categories, abstract, similarity
|
139 |
|
140 |
-
get_recommendations_TFIDF('''
|
141 |
-
In this paper we consider permutations of sequences of partitions, obtaining\na result which parallels von Neumann's theorem on permutations of dense\nsequences and uniformly distributed sequences of points.\n
|
142 |
-
''')
|
143 |
-
|
144 |
"""# Doc2Vec"""
|
145 |
|
146 |
import time
|
|
|
123 |
tfidf_vectorizer = TfidfVectorizer()
|
124 |
# Generate the tf-idf vectors for the corpus
|
125 |
tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
|
126 |
+
|
127 |
# compute and print the cosine similarity matrix
|
128 |
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
|
129 |
|
130 |
# Get the pairwise similarity scores
|
131 |
sim_scores = list(enumerate(cosine_sim[-1]))
|
132 |
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
133 |
+
|
134 |
+
# Check if the first result is the input abstract
|
135 |
+
if corpus[int(sim_scores[0][0])].split() == abstract.split() and corpus[int(sim_scores[1][0])].split() == abstract.split():
|
136 |
+
print(corpus[int(sim_scores[0][0])].split() == abstract.split())
|
137 |
+
print(corpus[int(sim_scores[1][0])].split() == abstract.split())
|
138 |
+
paper_indices = int(sim_scores[2][0])
|
139 |
+
similarity = "{:.2f}%".format(sim_scores[2][1] * 100) # Format similarity as a string with two decimal places and a percentage sign
|
140 |
+
elif sim_scores[0][0] == 500:
|
141 |
+
paper_indices = int(sim_scores[1][0])
|
142 |
+
similarity = "{:.2f}%".format(sim_scores[1][1] * 100) # Format similarity as a string with two decimal places and a percentage sign
|
143 |
+
else:
|
144 |
+
paper_indices = int(sim_scores[0][0])
|
145 |
+
similarity = "{:.2f}%".format(sim_scores[0][1] * 100) # Format similarity as a string with two decimal places and a percentage sign
|
146 |
|
147 |
title = train_df['title'].iloc[paper_indices]
|
148 |
categories = train_df['categories'].iloc[paper_indices]
|
149 |
abstract = train_df['abstract'].iloc[paper_indices]
|
150 |
+
|
151 |
return title, categories, abstract, similarity
|
152 |
|
|
|
|
|
|
|
|
|
153 |
"""# Doc2Vec"""
|
154 |
|
155 |
import time
|