Spaces:
Running
Running
Improve the presentation of outputs in frontend
Browse files- README.md +2 -1
- commafixer/src/baseline.py +1 -1
- commafixer/src/fixer.py +3 -3
- notebooks/evaluation.ipynb +2 -1
- static/index.html +2 -1
- static/script.js +4 -2
README.md
CHANGED
@@ -113,7 +113,8 @@ In our approach, for each comma from the prediction text obtained from the model
|
|
113 |
* If a comma from ground truth is not predicted, it counts as a false negative.
|
114 |
|
115 |
## Training
|
116 |
-
The fine-tuned model
|
|
|
117 |
|
118 |
To compare with the baseline, we fine-tune the same model, RoBERTa large, on the wikitext English dataset.
|
119 |
We use a similar approach, where we treat comma-fixing as a NER problem, and for each token predict whether a comma
|
|
|
113 |
* If a comma from ground truth is not predicted, it counts as a false negative.
|
114 |
|
115 |
## Training
|
116 |
+
The fine-tuned model is the [klasocki/roberta-large-lora-ner-comma-fixer](https://huggingface.co/klasocki/roberta-large-lora-ner-comma-fixer).
|
117 |
+
Further description can be found in the model card.
|
118 |
|
119 |
To compare with the baseline, we fine-tune the same model, RoBERTa large, on the wikitext English dataset.
|
120 |
We use a similar approach, where we treat comma-fixing as a NER problem, and for each token predict whether a comma
|
commafixer/src/baseline.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline, NerPipeline
|
2 |
import re
|
3 |
|
4 |
-
from
|
5 |
|
6 |
|
7 |
class BaselineCommaFixer(CommaFixerInterface):
|
|
|
1 |
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline, NerPipeline
|
2 |
import re
|
3 |
|
4 |
+
from comma_fixer_interface import CommaFixerInterface
|
5 |
|
6 |
|
7 |
class BaselineCommaFixer(CommaFixerInterface):
|
commafixer/src/fixer.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
from peft import PeftConfig, PeftModel
|
2 |
-
from transformers import AutoTokenizer, AutoModelForTokenClassification,
|
3 |
import nltk
|
4 |
import re
|
5 |
|
6 |
-
from
|
7 |
|
8 |
|
9 |
class CommaFixer(CommaFixerInterface):
|
@@ -11,7 +11,7 @@ class CommaFixer(CommaFixerInterface):
|
|
11 |
A wrapper class for the fine-tuned comma fixer model.
|
12 |
"""
|
13 |
|
14 |
-
def __init__(self
|
15 |
self.id2label = {0: 'O', 1: 'B-COMMA'}
|
16 |
self.label2id = {'O': 0, 'B-COMMA': 1}
|
17 |
self.model, self.tokenizer = self._load_peft_model()
|
|
|
1 |
from peft import PeftConfig, PeftModel
|
2 |
+
from transformers import AutoTokenizer, AutoModelForTokenClassification, RobertaTokenizerFast
|
3 |
import nltk
|
4 |
import re
|
5 |
|
6 |
+
from comma_fixer_interface import CommaFixerInterface
|
7 |
|
8 |
|
9 |
class CommaFixer(CommaFixerInterface):
|
|
|
11 |
A wrapper class for the fine-tuned comma fixer model.
|
12 |
"""
|
13 |
|
14 |
+
def __init__(self):
|
15 |
self.id2label = {0: 'O', 1: 'B-COMMA'}
|
16 |
self.label2id = {'O': 0, 'B-COMMA': 1}
|
17 |
self.model, self.tokenizer = self._load_peft_model()
|
notebooks/evaluation.ipynb
CHANGED
@@ -3281,7 +3281,8 @@
|
|
3281 |
{
|
3282 |
"cell_type": "code",
|
3283 |
"source": [
|
3284 |
-
"# comma_fixer.model = comma_fixer.model.cuda()"
|
|
|
3285 |
],
|
3286 |
"metadata": {
|
3287 |
"id": "ePP_WzS7XeYC"
|
|
|
3281 |
{
|
3282 |
"cell_type": "code",
|
3283 |
"source": [
|
3284 |
+
"# comma_fixer.model = comma_fixer.model.cuda() # TODO make this work and evaluate on test in the notebook as well. In\n",
|
3285 |
+
"# training eval on test was ~ same F1"
|
3286 |
],
|
3287 |
"metadata": {
|
3288 |
"id": "ePP_WzS7XeYC"
|
static/index.html
CHANGED
@@ -37,7 +37,8 @@
|
|
37 |
value="This is however a very bad, and terrible sentence grammatically that is."
|
38 |
/>
|
39 |
<button id="comma-fixing-submit">Submit</button>
|
40 |
-
<p class="comma-fixing-output"></p>
|
|
|
41 |
</form>
|
42 |
</section>
|
43 |
</main>
|
|
|
37 |
value="This is however a very bad, and terrible sentence grammatically that is."
|
38 |
/>
|
39 |
<button id="comma-fixing-submit">Submit</button>
|
40 |
+
<p class="comma-fixing-main-output"></p>
|
41 |
+
<p class="comma-fixing-baseline-output"></p>
|
42 |
</form>
|
43 |
</section>
|
44 |
</main>
|
static/script.js
CHANGED
@@ -22,9 +22,11 @@ commaFixingForm.addEventListener("submit", async (event) => {
|
|
22 |
event.preventDefault();
|
23 |
|
24 |
const commaFixingInput = document.getElementById("comma-fixing-input");
|
25 |
-
const commaFixingParagraph = document.querySelector(".comma-fixing-output");
|
|
|
26 |
|
27 |
const fixed = await fixCommas(commaFixingInput.value);
|
28 |
|
29 |
-
commaFixingParagraph.textContent = `Our model: ${fixed.main}
|
|
|
30 |
});
|
|
|
22 |
event.preventDefault();
|
23 |
|
24 |
const commaFixingInput = document.getElementById("comma-fixing-input");
|
25 |
+
const commaFixingParagraph = document.querySelector(".comma-fixing-main-output");
|
26 |
+
const commaFixingBaselineParagraph = document.querySelector(".comma-fixing-baseline-output");
|
27 |
|
28 |
const fixed = await fixCommas(commaFixingInput.value);
|
29 |
|
30 |
+
commaFixingParagraph.textContent = `Our model: ${fixed.main}`
|
31 |
+
commaFixingBaselineParagraph.textContent = `Baseline model: ${fixed.baseline}`
|
32 |
});
|