ddiddu commited on
Commit
9b08db0
1 Parent(s): e0c1f53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -6
app.py CHANGED
@@ -123,24 +123,33 @@ def get_recommendations_TFIDF(abstract):
123
  tfidf_vectorizer = TfidfVectorizer()
124
  # Generate the tf-idf vectors for the corpus
125
  tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
 
126
  # compute and print the cosine similarity matrix
127
  cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
128
 
129
  # Get the pairwise similarity scores
130
  sim_scores = list(enumerate(cosine_sim[-1]))
131
  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
132
- paper_indices = sim_scores[2][0]
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
  title = train_df['title'].iloc[paper_indices]
135
  categories = train_df['categories'].iloc[paper_indices]
136
  abstract = train_df['abstract'].iloc[paper_indices]
137
- similarity = "{:.2f}%".format(sim_scores[2][1] * 100) # Format similarity as a string with two decimal places and a percentage sign
138
  return title, categories, abstract, similarity
139
 
140
- get_recommendations_TFIDF('''
141
- In this paper we consider permutations of sequences of partitions, obtaining\na result which parallels von Neumann's theorem on permutations of dense\nsequences and uniformly distributed sequences of points.\n
142
- ''')
143
-
144
  """# Doc2Vec"""
145
 
146
  import time
 
123
  tfidf_vectorizer = TfidfVectorizer()
124
  # Generate the tf-idf vectors for the corpus
125
  tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)
126
+
127
  # compute and print the cosine similarity matrix
128
  cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
129
 
130
  # Get the pairwise similarity scores
131
  sim_scores = list(enumerate(cosine_sim[-1]))
132
  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
133
+
134
+ # Check if the first result is the input abstract
135
+ if corpus[int(sim_scores[0][0])].split() == abstract.split() and corpus[int(sim_scores[1][0])].split() == abstract.split():
136
+ print(corpus[int(sim_scores[0][0])].split() == abstract.split())
137
+ print(corpus[int(sim_scores[1][0])].split() == abstract.split())
138
+ paper_indices = int(sim_scores[2][0])
139
+ similarity = "{:.2f}%".format(sim_scores[2][1] * 100) # Format similarity as a string with two decimal places and a percentage sign
140
+ elif sim_scores[0][0] == 500:
141
+ paper_indices = int(sim_scores[1][0])
142
+ similarity = "{:.2f}%".format(sim_scores[1][1] * 100) # Format similarity as a string with two decimal places and a percentage sign
143
+ else:
144
+ paper_indices = int(sim_scores[0][0])
145
+ similarity = "{:.2f}%".format(sim_scores[0][1] * 100) # Format similarity as a string with two decimal places and a percentage sign
146
 
147
  title = train_df['title'].iloc[paper_indices]
148
  categories = train_df['categories'].iloc[paper_indices]
149
  abstract = train_df['abstract'].iloc[paper_indices]
150
+
151
  return title, categories, abstract, similarity
152
 
 
 
 
 
153
  """# Doc2Vec"""
154
 
155
  import time