hellopahe commited on
Commit
261e2d7
β€’
1 Parent(s): 94692cf

add limits of length 10

Browse files
Files changed (1) hide show
  1. lex_rank_util.py +11 -1
lex_rank_util.py CHANGED
@@ -8,6 +8,7 @@ from scipy.sparse.csgraph import connected_components
8
  from scipy.special import softmax
9
  import logging, math
10
 
 
11
  logger = logging.getLogger(__name__)
12
 
13
  def degree_centrality_scores(
@@ -132,9 +133,18 @@ def find_siblings_by_index(sentences: [str], central_indices: [int], siblings: i
132
  head = max(idx - siblings, 0)
133
  tail = min(idx + siblings + 1, len(sentences))
134
  for i in range(head, tail):
135
- if i not in ret:
136
  ret.append(i)
137
  num -= 1
138
 
139
  print(ret)
140
  return ret
 
 
 
 
 
 
 
 
 
 
8
  from scipy.special import softmax
9
  import logging, math
10
 
11
+ STOP_WORDS = ["δ½œθ€…", "η‰ˆζƒζ‰€ζœ‰", "η‰ˆζ¬Šζ‰€ζœ‰", "ζŠ•θ΅„ζœ‰ι£Žι™©", "ζŠ•θ³‡ζœ‰ι’¨ιšͺ", "http", "https", "ζ₯源"]
12
  logger = logging.getLogger(__name__)
13
 
14
  def degree_centrality_scores(
 
133
  head = max(idx - siblings, 0)
134
  tail = min(idx + siblings + 1, len(sentences))
135
  for i in range(head, tail):
136
+ if i not in ret and check_valid_sentence(sentences[i]):
137
  ret.append(i)
138
  num -= 1
139
 
140
  print(ret)
141
  return ret
142
+
143
+
144
+ def check_valid_sentence(content: str):
145
+ if len(content) < 10:
146
+ return False
147
+ for stop_word in STOP_WORDS:
148
+ if stop_word in content:
149
+ return False
150
+ return True