Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files
transformers_rec/transformers_recognizer.py
CHANGED
@@ -224,14 +224,18 @@ class TransformersRecognizer(EntityRecognizer):
|
|
224 |
model_max_length = self.pipeline.tokenizer.model_max_length
|
225 |
# calculate inputs based on the text
|
226 |
text_length = len(text)
|
227 |
-
# split text into chunks
|
228 |
-
logger.info(
|
229 |
-
f"splitting the text into chunks, length {text_length} > {model_max_length*2}"
|
230 |
-
)
|
231 |
predictions = list()
|
232 |
-
|
233 |
-
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
# iterate over text chunks and run inference
|
237 |
for chunk_start, chunk_end in chunk_indexes:
|
|
|
224 |
model_max_length = self.pipeline.tokenizer.model_max_length
|
225 |
# calculate inputs based on the text
|
226 |
text_length = len(text)
|
|
|
|
|
|
|
|
|
227 |
predictions = list()
|
228 |
+
if text_length > model_max_length*2:
|
229 |
+
# split text into chunks
|
230 |
+
logger.info(
|
231 |
+
f"splitting the text into chunks, length {text_length} > {model_max_length*2}"
|
232 |
+
)
|
233 |
+
|
234 |
+
chunk_indexes = TransformersRecognizer.split_text_to_word_chunks(
|
235 |
+
text_length, self.chunk_length, self.text_overlap_length
|
236 |
+
)
|
237 |
+
else:
|
238 |
+
chunk_indexes = [[0, text_length]]
|
239 |
|
240 |
# iterate over text chunks and run inference
|
241 |
for chunk_start, chunk_end in chunk_indexes:
|