Initial Commit
Browse files- README.md +25 -24
- config.json +1 -1
- eval_results_cardiff.json +1 -0
- model.safetensors +3 -0
- training_args.bin +2 -2
README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
---
|
|
|
2 |
license: mit
|
3 |
base_model: microsoft/mdeberta-v3-base
|
4 |
tags:
|
@@ -18,9 +19,9 @@ should probably proofread and complete it, then remove this comment. -->
|
|
18 |
|
19 |
This model is a fine-tuned version of [microsoft/mdeberta-v3-base](https://huggingface.co/microsoft/mdeberta-v3-base) on the None dataset.
|
20 |
It achieves the following results on the evaluation set:
|
21 |
-
- Loss: 5.
|
22 |
-
- Accuracy: 0.
|
23 |
-
- F1: 0.
|
24 |
|
25 |
## Model description
|
26 |
|
@@ -49,30 +50,30 @@ The following hyperparameters were used during training:
|
|
49 |
|
50 |
### Training results
|
51 |
|
52 |
-
| Training Loss | Epoch
|
53 |
-
|
54 |
-
| No log | 1.
|
55 |
-
| No log | 3.
|
56 |
-
| No log | 5.
|
57 |
-
| No log | 6.
|
58 |
-
| 0.
|
59 |
-
| 0.
|
60 |
-
| 0.
|
61 |
-
| 0.
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
|
72 |
|
73 |
### Framework versions
|
74 |
|
75 |
-
- Transformers 4.
|
76 |
- Pytorch 2.1.1+cu121
|
77 |
- Datasets 2.14.5
|
78 |
-
- Tokenizers 0.
|
|
|
1 |
---
|
2 |
+
library_name: transformers
|
3 |
license: mit
|
4 |
base_model: microsoft/mdeberta-v3-base
|
5 |
tags:
|
|
|
19 |
|
20 |
This model is a fine-tuned version of [microsoft/mdeberta-v3-base](https://huggingface.co/microsoft/mdeberta-v3-base) on the None dataset.
|
21 |
It achieves the following results on the evaluation set:
|
22 |
+
- Loss: 5.1657
|
23 |
+
- Accuracy: 0.4577
|
24 |
+
- F1: 0.4543
|
25 |
|
26 |
## Model description
|
27 |
|
|
|
50 |
|
51 |
### Training results
|
52 |
|
53 |
+
| Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 |
|
54 |
+
|:-------------:|:-------:|:----:|:---------------:|:--------:|:------:|
|
55 |
+
| No log | 1.7241 | 100 | 1.1068 | 0.4330 | 0.3826 |
|
56 |
+
| No log | 3.4483 | 200 | 1.4495 | 0.4533 | 0.4238 |
|
57 |
+
| No log | 5.1724 | 300 | 1.5295 | 0.4586 | 0.4497 |
|
58 |
+
| No log | 6.8966 | 400 | 2.0122 | 0.4537 | 0.4516 |
|
59 |
+
| 0.5768 | 8.6207 | 500 | 3.0885 | 0.4493 | 0.4417 |
|
60 |
+
| 0.5768 | 10.3448 | 600 | 3.3878 | 0.4541 | 0.4497 |
|
61 |
+
| 0.5768 | 12.0690 | 700 | 3.4115 | 0.4586 | 0.4564 |
|
62 |
+
| 0.5768 | 13.7931 | 800 | 3.8779 | 0.4590 | 0.4572 |
|
63 |
+
| 0.5768 | 15.5172 | 900 | 4.1514 | 0.4590 | 0.4579 |
|
64 |
+
| 0.0737 | 17.2414 | 1000 | 4.6699 | 0.4462 | 0.4281 |
|
65 |
+
| 0.0737 | 18.9655 | 1100 | 4.6724 | 0.4608 | 0.4612 |
|
66 |
+
| 0.0737 | 20.6897 | 1200 | 4.6790 | 0.4603 | 0.4562 |
|
67 |
+
| 0.0737 | 22.4138 | 1300 | 4.9305 | 0.4581 | 0.4564 |
|
68 |
+
| 0.0737 | 24.1379 | 1400 | 5.0621 | 0.4568 | 0.4503 |
|
69 |
+
| 0.0099 | 25.8621 | 1500 | 5.0787 | 0.4608 | 0.4574 |
|
70 |
+
| 0.0099 | 27.5862 | 1600 | 5.1428 | 0.4581 | 0.4549 |
|
71 |
+
| 0.0099 | 29.3103 | 1700 | 5.1657 | 0.4577 | 0.4543 |
|
72 |
|
73 |
|
74 |
### Framework versions
|
75 |
|
76 |
+
- Transformers 4.44.2
|
77 |
- Pytorch 2.1.1+cu121
|
78 |
- Datasets 2.14.5
|
79 |
+
- Tokenizers 0.19.1
|
config.json
CHANGED
@@ -39,7 +39,7 @@
|
|
39 |
"relative_attention": true,
|
40 |
"share_att_key": true,
|
41 |
"torch_dtype": "float32",
|
42 |
-
"transformers_version": "4.
|
43 |
"type_vocab_size": 0,
|
44 |
"vocab_size": 251000
|
45 |
}
|
|
|
39 |
"relative_attention": true,
|
40 |
"share_att_key": true,
|
41 |
"torch_dtype": "float32",
|
42 |
+
"transformers_version": "4.44.2",
|
43 |
"type_vocab_size": 0,
|
44 |
"vocab_size": 251000
|
45 |
}
|
eval_results_cardiff.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"arabic": {"f1": 0.48707540389654164, "accuracy": 0.4862068965517241, "confusion_matrix": [[159, 97, 34], [105, 123, 62], [91, 58, 141]]}, "english": {"f1": 0.579978194354665, "accuracy": 0.5781609195402299, "confusion_matrix": [[197, 78, 15], [103, 139, 48], [35, 88, 167]]}, "french": {"f1": 0.4161104946790835, "accuracy": 0.44022988505747124, "confusion_matrix": [[149, 114, 27], [65, 182, 43], [102, 136, 52]]}, "german": {"f1": 0.49275962433716414, "accuracy": 0.5, "confusion_matrix": [[97, 113, 80], [48, 172, 70], [40, 84, 166]]}, "hindi": {"f1": 0.4615527004195401, "accuracy": 0.4666666666666667, "confusion_matrix": [[96, 108, 86], [63, 158, 69], [52, 86, 152]]}, "italian": {"f1": 0.5166065385724214, "accuracy": 0.5160919540229885, "confusion_matrix": [[139, 108, 43], [46, 171, 73], [51, 100, 139]]}, "portuguese": {"f1": 0.42506022985325814, "accuracy": 0.42873563218390803, "confusion_matrix": [[97, 128, 65], [57, 162, 71], [42, 134, 114]]}, "spanish": {"f1": 0.5039475363264437, "accuracy": 0.503448275862069, "confusion_matrix": [[148, 91, 51], [96, 127, 67], [51, 76, 163]]}, "all": {"f1": 0.4962067776821599, "accuracy": 0.49525862068965515, "confusion_matrix": [[1119, 807, 394], [603, 1226, 491], [458, 760, 1102]]}}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8439ea0cc00fb27f0a0df695f6e15f4ec6f6ea11f3eb2bf64a71fa02f97a91f5
|
3 |
+
size 946716948
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3ab9eb1868c260d1545f8273c1ff5badcc5b8de143e5f48a43057546fbf3f03
|
3 |
+
size 5304
|