hellopahe commited on
Commit
94692cf
1 Parent(s): d777f98

remove redundancy

Browse files
Files changed (5) hide show
  1. lex_rank.py +5 -7
  2. lex_rank_L12.py +4 -7
  3. lex_rank_text2vec_v1.py +4 -7
  4. lex_rank_util.py +14 -6
  5. test.py +8 -0
lex_rank.py CHANGED
@@ -5,7 +5,7 @@ nltk.download('punkt')
5
 
6
 
7
  from harvesttext import HarvestText
8
- from lex_rank_util import degree_centrality_scores, find_siblings
9
  from sentence_transformers import SentenceTransformer, util
10
 
11
 
@@ -30,15 +30,13 @@ class LexRank(object):
30
  # We argsort so that the first element is the sentence with the highest score
31
  most_central_sentence_indices = numpy.argsort(-centrality_scores)
32
 
33
- # num = 100
34
  res = []
35
- for index in most_central_sentence_indices:
36
- if num < 0:
37
- break
38
- res.append(find_siblings(sentences, index, siblings)[1])
39
- num -= 1
40
  return res
41
 
 
42
  def contains_chinese(self, content: str):
43
  for _char in content:
44
  if '\u4e00' <= _char <= '\u9fa5':
 
5
 
6
 
7
  from harvesttext import HarvestText
8
+ from lex_rank_util import degree_centrality_scores, find_siblings_by_index
9
  from sentence_transformers import SentenceTransformer, util
10
 
11
 
 
30
  # We argsort so that the first element is the sentence with the highest score
31
  most_central_sentence_indices = numpy.argsort(-centrality_scores)
32
 
33
+ central_and_siblings = find_siblings_by_index(sentences, most_central_sentence_indices, siblings, num)
34
  res = []
35
+ for index in central_and_siblings:
36
+ res.append(sentences[index])
 
 
 
37
  return res
38
 
39
+
40
  def contains_chinese(self, content: str):
41
  for _char in content:
42
  if '\u4e00' <= _char <= '\u9fa5':
lex_rank_L12.py CHANGED
@@ -3,7 +3,7 @@ nltk.download('punkt')
3
 
4
 
5
  from harvesttext import HarvestText
6
- from lex_rank_util import degree_centrality_scores, find_siblings
7
  from sentence_transformers import SentenceTransformer, util
8
 
9
 
@@ -28,13 +28,10 @@ class LexRankL12(object):
28
  # We argsort so that the first element is the sentence with the highest score
29
  most_central_sentence_indices = numpy.argsort(-centrality_scores)
30
 
31
- # num = 100
32
  res = []
33
- for index in most_central_sentence_indices:
34
- if num < 0:
35
- break
36
- res.append(find_siblings(sentences, index, siblings)[1])
37
- num -= 1
38
  return res
39
 
40
  def contains_chinese(self, content: str):
 
3
 
4
 
5
  from harvesttext import HarvestText
6
+ from lex_rank_util import degree_centrality_scores, find_siblings_by_index
7
  from sentence_transformers import SentenceTransformer, util
8
 
9
 
 
28
  # We argsort so that the first element is the sentence with the highest score
29
  most_central_sentence_indices = numpy.argsort(-centrality_scores)
30
 
31
+ central_and_siblings = find_siblings_by_index(sentences, most_central_sentence_indices, siblings, num)
32
  res = []
33
+ for index in central_and_siblings:
34
+ res.append(sentences[index])
 
 
 
35
  return res
36
 
37
  def contains_chinese(self, content: str):
lex_rank_text2vec_v1.py CHANGED
@@ -3,7 +3,7 @@ nltk.download('punkt')
3
 
4
 
5
  from harvesttext import HarvestText
6
- from lex_rank_util import degree_centrality_scores, find_siblings
7
  from sentence_transformers import SentenceTransformer, util
8
 
9
 
@@ -28,13 +28,10 @@ class LexRankText2VecV1(object):
28
  # We argsort so that the first element is the sentence with the highest score
29
  most_central_sentence_indices = numpy.argsort(-centrality_scores)
30
 
31
- # num = 100
32
  res = []
33
- for index in most_central_sentence_indices:
34
- if num < 0:
35
- break
36
- res.append(find_siblings(sentences, index, siblings)[1])
37
- num -= 1
38
  return res
39
 
40
  def contains_chinese(self, content: str):
 
3
 
4
 
5
  from harvesttext import HarvestText
6
+ from lex_rank_util import degree_centrality_scores, find_siblings_by_index
7
  from sentence_transformers import SentenceTransformer, util
8
 
9
 
 
28
  # We argsort so that the first element is the sentence with the highest score
29
  most_central_sentence_indices = numpy.argsort(-centrality_scores)
30
 
31
+ central_and_siblings = find_siblings_by_index(sentences, most_central_sentence_indices, siblings, num)
32
  res = []
33
+ for index in central_and_siblings:
34
+ res.append(sentences[index])
 
 
 
35
  return res
36
 
37
  def contains_chinese(self, content: str):
lex_rank_util.py CHANGED
@@ -124,9 +124,17 @@ def stationary_distribution(
124
  return distribution
125
 
126
 
127
- def find_siblings(sentences: [str], idx: int, siblings: int) -> (int, str):
128
- if not siblings < math.ceil(len(sentences) / 2):
129
- return -1, "siblings too large, try some value smaller."
130
- head = max(idx - siblings, 0)
131
- tail = min(idx + siblings + 1, len(sentences))
132
- return 0, "".join(sentences[head:tail])
 
 
 
 
 
 
 
 
 
124
  return distribution
125
 
126
 
127
+ def find_siblings_by_index(sentences: [str], central_indices: [int], siblings: int, num: int):
128
+ ret = []
129
+ for idx in central_indices:
130
+ if num < 0:
131
+ break
132
+ head = max(idx - siblings, 0)
133
+ tail = min(idx + siblings + 1, len(sentences))
134
+ for i in range(head, tail):
135
+ if i not in ret:
136
+ ret.append(i)
137
+ num -= 1
138
+
139
+ print(ret)
140
+ return ret
test.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ s = set()
2
+ for i in range
3
+
4
+ arr = [i for i in s]
5
+
6
+ print(type(arr))
7
+ arr.sort(reverse=True)
8
+ print(arr)