init

Browse files

Files changed (9) hide show

.gitattributes +1 -0
README.md +56 -0
added_tokens.json +3 -0
config.json +22 -0
pytorch_model.bin +3 -0
special_tokens_map.json +9 -0
spm.model +3 -0
tokenizer.json +0 -0
tokenizer_config.json +17 -0

.gitattributes CHANGED Viewed

@@ -1,6 +1,7 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text

 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,59 @@
 ---
 license: gpl-3.0
 ---

 ---
+language: en
+tags:
+- greco
+- grammar
+- grammaticality
+- gec
+base_model: microsoft/deberta-v3-large
+datasets: w&i+locness
+model-index:
+  - name: GRECO
+    results:
+      - task:
+          type: grammatical-error-correction
+          name: Grammatical Error Correction
+        dataset:
+          type: conll-2014-shared-task-grammatical-error
+          name: CoNLL-2014
+          split: test
+        metrics:
+          - type: f0.5
+            value: 71.12
+            name: F0.5
+        source:
+          name: NLP-progress
+          url: https://nlpprogress.com/english/grammatical_error_correction.html
 license: gpl-3.0
 ---
+# GRECO: Gammaticality-scorer for re-ranking corrections
+GRECO is a quality estimation model for grammatical error correction. The model is trained to detect which words are incorrect and whether a word or phrase needs to be inserted after certain words. You can then use the model to get the grammaticality score of a sentence.
+Please check the [official repository](https://github.com/nusnlp/greco/tree/main) for more implementation details and updates.
+The model was published in the following paper:
+> System Combination via Quality Estimation for Grammatical Error Correction ([PDF](https://arxiv.org/abs/2310.14947) | [ACL Anthology](https://aclanthology.org/2023.emnlp-main.785/)) <br>
+> [Muhammad Reza Qorib](https://mrqorib.github.io/) and [Hwee Tou Ng](https://www.comp.nus.edu.sg/~nght/) <br>
+> The 2023 Conference on Empirical Methods in Natural Language Processing (EMNLP)
+## Citation
+If you find it useful for your work, please cite the paper:
+```latex
+@inproceedings{qorib-ng-2023-system,
+    title = "System Combination via Quality Estimation for Grammatical Error Correction",
+    author = "Qorib, Muhammad Reza  and
+      Ng, Hwee Tou",
+    editor = "Bouamor, Houda  and
+      Pino, Juan  and
+      Bali, Kalika",
+    booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
+    month = dec,
+    year = "2023",
+    address = "Singapore",
+    publisher = "Association for Computational Linguistics",
+    url = "https://aclanthology.org/2023.emnlp-main.785",
+    doi = "10.18653/v1/2023.emnlp-main.785",
+    pages = "12746--12759",
+}
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+		"model_type": "deberta-v2",
+		"attention_probs_dropout_prob": 0.1,
+		"hidden_act": "gelu",
+		"hidden_dropout_prob": 0.1,
+		"hidden_size": 1024,
+		"initializer_range": 0.02,
+		"intermediate_size": 4096,
+		"max_position_embeddings": 512,
+		"relative_attention": true,
+		"position_buckets": 256,
+		"norm_rel_ebd": "layer_norm",
+		"share_att_key": true,
+		"pos_att_type": "p2c|c2p",
+		"layer_norm_eps": 1e-7,
+		"max_relative_positions": -1,
+		"position_biased_input": false,
+		"num_attention_heads": 16,
+		"num_hidden_layers": 24,
+		"type_vocab_size": 0,
+		"vocab_size": 128100
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a11360c12bdfb20660555fe1e4a71e28b4c21e0acd0b4077d8af80a8f096884
+size 1748823771

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "name_or_path": "microsoft/deberta-v3-large",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "special_tokens_map_file": null,
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}