ShkalikovOleh commited on
Commit
ef86d3c
1 Parent(s): a2d70b7

End of training

Browse files
README.md CHANGED
@@ -4,6 +4,8 @@ license: mit
4
  base_model: microsoft/mdeberta-v3-base
5
  tags:
6
  - generated_from_trainer
 
 
7
  metrics:
8
  - precision
9
  - recall
@@ -11,7 +13,27 @@ metrics:
11
  - accuracy
12
  model-index:
13
  - name: mdeberta-v3-base-panx-wikiann-en
14
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ---
16
 
17
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -19,7 +41,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # mdeberta-v3-base-panx-wikiann-en
21
 
22
- This model is a fine-tuned version of [microsoft/mdeberta-v3-base](https://huggingface.co/microsoft/mdeberta-v3-base) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.2520
25
  - Precision: 0.8285
 
4
  base_model: microsoft/mdeberta-v3-base
5
  tags:
6
  - generated_from_trainer
7
+ datasets:
8
+ - google/xtreme
9
  metrics:
10
  - precision
11
  - recall
 
13
  - accuracy
14
  model-index:
15
  - name: mdeberta-v3-base-panx-wikiann-en
16
+ results:
17
+ - task:
18
+ name: Token Classification
19
+ type: token-classification
20
+ dataset:
21
+ name: google/xtreme PAN-X.en
22
+ type: google/xtreme
23
+ args: PAN-X.en
24
+ metrics:
25
+ - name: Precision
26
+ type: precision
27
+ value: 0.8285338502007477
28
+ - name: Recall
29
+ type: recall
30
+ value: 0.8461049059804892
31
+ - name: F1
32
+ type: f1
33
+ value: 0.8372271964185787
34
+ - name: Accuracy
35
+ type: accuracy
36
+ value: 0.9318317274262442
37
  ---
38
 
39
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
41
 
42
  # mdeberta-v3-base-panx-wikiann-en
43
 
44
+ This model is a fine-tuned version of [microsoft/mdeberta-v3-base](https://huggingface.co/microsoft/mdeberta-v3-base) on the google/xtreme PAN-X.en dataset.
45
  It achieves the following results on the evaluation set:
46
  - Loss: 0.2520
47
  - Precision: 0.8285
all_results.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9318317274262442,
4
+ "eval_f1": 0.8372271964185787,
5
+ "eval_loss": 0.2519814968109131,
6
+ "eval_precision": 0.8285338502007477,
7
+ "eval_recall": 0.8461049059804892,
8
+ "eval_runtime": 38.6807,
9
+ "eval_samples": 10000,
10
+ "eval_samples_per_second": 258.527,
11
+ "eval_steps_per_second": 32.316,
12
+ "predict_accuracy": 0.9317655553619003,
13
+ "predict_f1": 0.8329969906178084,
14
+ "predict_loss": 0.2560366988182068,
15
+ "predict_precision": 0.8234058934695877,
16
+ "predict_recall": 0.8428141567559823,
17
+ "predict_runtime": 37.6178,
18
+ "predict_samples_per_second": 265.832,
19
+ "predict_steps_per_second": 33.229,
20
+ "total_flos": 1384127485887552.0,
21
+ "train_loss": 0.23445223083496095,
22
+ "train_runtime": 2722.1951,
23
+ "train_samples": 20000,
24
+ "train_samples_per_second": 36.735,
25
+ "train_steps_per_second": 1.148
26
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.9318317274262442,
4
+ "eval_f1": 0.8372271964185787,
5
+ "eval_loss": 0.2519814968109131,
6
+ "eval_precision": 0.8285338502007477,
7
+ "eval_recall": 0.8461049059804892,
8
+ "eval_runtime": 38.6807,
9
+ "eval_samples": 10000,
10
+ "eval_samples_per_second": 258.527,
11
+ "eval_steps_per_second": 32.316
12
+ }
predict_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_accuracy": 0.9317655553619003,
3
+ "predict_f1": 0.8329969906178084,
4
+ "predict_loss": 0.2560366988182068,
5
+ "predict_precision": 0.8234058934695877,
6
+ "predict_recall": 0.8428141567559823,
7
+ "predict_runtime": 37.6178,
8
+ "predict_samples_per_second": 265.832,
9
+ "predict_steps_per_second": 33.229
10
+ }
predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
runs/Oct22_08-33-43_b2ebde4835e6/events.out.tfevents.1729588845.b2ebde4835e6.7315.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c72b528d4f39e0010b83db4ff23dd9ffed2f4a37fe88ca913454ea2f87fe854c
3
+ size 560
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "total_flos": 1384127485887552.0,
4
+ "train_loss": 0.23445223083496095,
5
+ "train_runtime": 2722.1951,
6
+ "train_samples": 20000,
7
+ "train_samples_per_second": 36.735,
8
+ "train_steps_per_second": 1.148
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3125,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8,
13
+ "grad_norm": 2.823965072631836,
14
+ "learning_rate": 1.68064e-05,
15
+ "loss": 0.4565,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.9214636932551902,
21
+ "eval_f1": 0.806775887856969,
22
+ "eval_loss": 0.26512816548347473,
23
+ "eval_precision": 0.7941518865986441,
24
+ "eval_recall": 0.8198077194966775,
25
+ "eval_runtime": 37.78,
26
+ "eval_samples_per_second": 264.691,
27
+ "eval_steps_per_second": 33.086,
28
+ "step": 625
29
+ },
30
+ {
31
+ "epoch": 1.6,
32
+ "grad_norm": 5.351424217224121,
33
+ "learning_rate": 1.3606400000000002e-05,
34
+ "loss": 0.2612,
35
+ "step": 1000
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_accuracy": 0.9257350749975166,
40
+ "eval_f1": 0.8162128281913782,
41
+ "eval_loss": 0.2490304708480835,
42
+ "eval_precision": 0.8042821850123525,
43
+ "eval_recall": 0.8285027569630992,
44
+ "eval_runtime": 38.4394,
45
+ "eval_samples_per_second": 260.15,
46
+ "eval_steps_per_second": 32.519,
47
+ "step": 1250
48
+ },
49
+ {
50
+ "epoch": 2.4,
51
+ "grad_norm": 3.4635002613067627,
52
+ "learning_rate": 1.0406400000000001e-05,
53
+ "loss": 0.2184,
54
+ "step": 1500
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_accuracy": 0.9293856163703189,
59
+ "eval_f1": 0.8262937062937062,
60
+ "eval_loss": 0.2470722198486328,
61
+ "eval_precision": 0.8174899681748997,
62
+ "eval_recall": 0.8352891276685989,
63
+ "eval_runtime": 38.5739,
64
+ "eval_samples_per_second": 259.242,
65
+ "eval_steps_per_second": 32.405,
66
+ "step": 1875
67
+ },
68
+ {
69
+ "epoch": 3.2,
70
+ "grad_norm": 2.8742923736572266,
71
+ "learning_rate": 7.2064e-06,
72
+ "loss": 0.1874,
73
+ "step": 2000
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "grad_norm": 0.9541894197463989,
78
+ "learning_rate": 4.012800000000001e-06,
79
+ "loss": 0.1636,
80
+ "step": 2500
81
+ },
82
+ {
83
+ "epoch": 4.0,
84
+ "eval_accuracy": 0.9307638819906625,
85
+ "eval_f1": 0.8312837484758753,
86
+ "eval_loss": 0.2493128478527069,
87
+ "eval_precision": 0.819493097053369,
88
+ "eval_recall": 0.8434186342428955,
89
+ "eval_runtime": 36.2649,
90
+ "eval_samples_per_second": 275.749,
91
+ "eval_steps_per_second": 34.469,
92
+ "step": 2500
93
+ },
94
+ {
95
+ "epoch": 4.8,
96
+ "grad_norm": 3.6088337898254395,
97
+ "learning_rate": 8.128000000000001e-07,
98
+ "loss": 0.1408,
99
+ "step": 3000
100
+ },
101
+ {
102
+ "epoch": 5.0,
103
+ "eval_accuracy": 0.9318317274262442,
104
+ "eval_f1": 0.8372271964185787,
105
+ "eval_loss": 0.2519814968109131,
106
+ "eval_precision": 0.8285338502007477,
107
+ "eval_recall": 0.8461049059804892,
108
+ "eval_runtime": 37.7598,
109
+ "eval_samples_per_second": 264.832,
110
+ "eval_steps_per_second": 33.104,
111
+ "step": 3125
112
+ },
113
+ {
114
+ "epoch": 5.0,
115
+ "step": 3125,
116
+ "total_flos": 1384127485887552.0,
117
+ "train_loss": 0.23445223083496095,
118
+ "train_runtime": 2722.1951,
119
+ "train_samples_per_second": 36.735,
120
+ "train_steps_per_second": 1.148
121
+ }
122
+ ],
123
+ "logging_steps": 500,
124
+ "max_steps": 3125,
125
+ "num_input_tokens_seen": 0,
126
+ "num_train_epochs": 5,
127
+ "save_steps": 500,
128
+ "stateful_callbacks": {
129
+ "TrainerControl": {
130
+ "args": {
131
+ "should_epoch_stop": false,
132
+ "should_evaluate": false,
133
+ "should_log": false,
134
+ "should_save": false,
135
+ "should_training_stop": false
136
+ },
137
+ "attributes": {}
138
+ }
139
+ },
140
+ "total_flos": 1384127485887552.0,
141
+ "train_batch_size": 4,
142
+ "trial_name": null,
143
+ "trial_params": null
144
+ }