Fix two labels back to back
Browse filesIf you check something like doc 12483 in the competition, you'll see that the tokens and labels are back-to-back
```
('B-NAME_STUDENT', 'Lamiaa'),
('I-NAME_STUDENT', 'Achraf'),
('B-ID_NUM', 'Weyhacy_7000693584')
```
The gradio app will label them both as ID_NUM.
This PR should fix that and not have any other issues.
app.py
CHANGED
@@ -86,7 +86,7 @@ def render_sample(subset, pii_type):
|
|
86 |
for index, label in enumerate(sample['labels']):
|
87 |
if label.startswith('B-'):
|
88 |
if in_entity: # End the previous entity
|
89 |
-
ents.append(Span(doc, start, end,
|
90 |
start, end = index, index + 1 # Start a new entity
|
91 |
in_entity = True
|
92 |
elif label.startswith('I-') and in_entity:
|
|
|
86 |
for index, label in enumerate(sample['labels']):
|
87 |
if label.startswith('B-'):
|
88 |
if in_entity: # End the previous entity
|
89 |
+
ents.append(Span(doc, start, end, sample['labels'][start][2:]))
|
90 |
start, end = index, index + 1 # Start a new entity
|
91 |
in_entity = True
|
92 |
elif label.startswith('I-') and in_entity:
|