camila-ud commited on
Commit
db7ce2d
1 Parent(s): ea18202

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -3
README.md CHANGED
@@ -62,10 +62,11 @@ from medkit.core.text import TextDocument
62
  from medkit.text.ner.hf_entity_matcher import HFEntityMatcher
63
 
64
  matcher = HFEntityMatcher(model="camila-ud/DrBERT-CASM2")
 
65
  test_doc = TextDocument("Elle souffre d'asthme mais n'a pas besoin d'Allegra")
 
66
 
67
- # detect entities in the raw segment
68
- detected_entities = matcher.run([test_doc.raw_segment])
69
  msg = "|".join(f"'{entity.label}':{entity.text}" for entity in detected_entities)
70
  print(f"Text: '{test_doc.text}'\n{msg}")
71
  ```
@@ -119,10 +120,18 @@ from medkit.text.metrics.ner import SeqEvalEvaluator
119
  matcher = HFEntityMatcher(model="camila-ud/DrBERT-CASM2")
120
  predicted_entities = [matcher.run([doc.raw_segment]) for doc in test_documents]
121
 
122
- # define seqeval evaluator
123
  evaluator = SeqEvalEvaluator(tagging_scheme="iob2")
124
  evaluator.compute(test_documents,predicted_entities=predicted_entities)
125
  ```
 
 
 
 
 
 
 
 
 
126
 
127
  # Citation
128
 
 
62
  from medkit.text.ner.hf_entity_matcher import HFEntityMatcher
63
 
64
  matcher = HFEntityMatcher(model="camila-ud/DrBERT-CASM2")
65
+
66
  test_doc = TextDocument("Elle souffre d'asthme mais n'a pas besoin d'Allegra")
67
+ detected_entities = matcher.run([test_doc.raw_segment])
68
 
69
+ # show information
 
70
  msg = "|".join(f"'{entity.label}':{entity.text}" for entity in detected_entities)
71
  print(f"Text: '{test_doc.text}'\n{msg}")
72
  ```
 
120
  matcher = HFEntityMatcher(model="camila-ud/DrBERT-CASM2")
121
  predicted_entities = [matcher.run([doc.raw_segment]) for doc in test_documents]
122
 
 
123
  evaluator = SeqEvalEvaluator(tagging_scheme="iob2")
124
  evaluator.compute(test_documents,predicted_entities=predicted_entities)
125
  ```
126
+ You can use the tokenizer from HF to evaluate by tokens instead of characters
127
+ ```python
128
+ from transformers import AutoTokenizer
129
+
130
+ tokenizer_drbert = AutoTokenizer.from_pretrained("camila-ud/DrBERT-CASM2", use_fast=True)
131
+
132
+ evaluator = SeqEvalEvaluator(tokenizer=tokenizer_drbert,tagging_scheme="iob2")
133
+ evaluator.compute(test_documents,predicted_entities=predicted_entities)
134
+ ```
135
 
136
  # Citation
137