Harshkmr commited on
Commit
b435234
1 Parent(s): 5072249

Model save

Browse files
Files changed (6) hide show
  1. README.md +85 -0
  2. all_results.json +37 -0
  3. eval_results.json +31 -0
  4. model.safetensors +1 -1
  5. train_results.json +9 -0
  6. trainer_state.json +1287 -0
README.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: microsoft/deberta-v3-base
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: deberta-v3-base_finetuned_bluegennx_run2.19_5e
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # deberta-v3-base_finetuned_bluegennx_run2.19_5e
15
+
16
+ This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.0196
19
+ - Overall Precision: 0.9773
20
+ - Overall Recall: 0.9870
21
+ - Overall F1: 0.9822
22
+ - Overall Accuracy: 0.9957
23
+ - Aadhar Card F1: 0.9908
24
+ - Age F1: 0.9708
25
+ - City F1: 0.9879
26
+ - Country F1: 0.9825
27
+ - Creditcardcvv F1: 0.9915
28
+ - Creditcardnumber F1: 0.9428
29
+ - Date F1: 0.9626
30
+ - Dateofbirth F1: 0.9056
31
+ - Email F1: 0.9928
32
+ - Expirydate F1: 0.9898
33
+ - Organization F1: 0.9925
34
+ - Pan Card F1: 0.9866
35
+ - Person F1: 0.9887
36
+ - Phonenumber F1: 0.9880
37
+ - Pincode F1: 0.9897
38
+ - Secondaryaddress F1: 0.9891
39
+ - State F1: 0.9912
40
+ - Time F1: 0.9831
41
+ - Url F1: 0.9955
42
+
43
+ ## Model description
44
+
45
+ More information needed
46
+
47
+ ## Intended uses & limitations
48
+
49
+ More information needed
50
+
51
+ ## Training and evaluation data
52
+
53
+ More information needed
54
+
55
+ ## Training procedure
56
+
57
+ ### Training hyperparameters
58
+
59
+ The following hyperparameters were used during training:
60
+ - learning_rate: 5e-05
61
+ - train_batch_size: 4
62
+ - eval_batch_size: 4
63
+ - seed: 42
64
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
65
+ - lr_scheduler_type: cosine_with_restarts
66
+ - lr_scheduler_warmup_ratio: 0.2
67
+ - num_epochs: 5
68
+
69
+ ### Training results
70
+
71
+ | Training Loss | Epoch | Step | Validation Loss | Overall Precision | Overall Recall | Overall F1 | Overall Accuracy | Aadhar Card F1 | Age F1 | City F1 | Country F1 | Creditcardcvv F1 | Creditcardnumber F1 | Date F1 | Dateofbirth F1 | Email F1 | Expirydate F1 | Organization F1 | Pan Card F1 | Person F1 | Phonenumber F1 | Pincode F1 | Secondaryaddress F1 | State F1 | Time F1 | Url F1 |
72
+ |:-------------:|:-----:|:-----:|:---------------:|:-----------------:|:--------------:|:----------:|:----------------:|:--------------:|:------:|:-------:|:----------:|:----------------:|:-------------------:|:-------:|:--------------:|:--------:|:-------------:|:---------------:|:-----------:|:---------:|:--------------:|:----------:|:-------------------:|:--------:|:-------:|:------:|
73
+ | 0.0356 | 1.0 | 15321 | 0.0383 | 0.9535 | 0.9675 | 0.9604 | 0.9915 | 0.9542 | 0.9221 | 0.9617 | 0.9816 | 0.9243 | 0.9195 | 0.9235 | 0.8262 | 0.9826 | 0.9477 | 0.9882 | 0.9529 | 0.9785 | 0.9684 | 0.9187 | 0.9734 | 0.9665 | 0.9723 | 0.9888 |
74
+ | 0.0231 | 2.0 | 30642 | 0.0265 | 0.9607 | 0.9814 | 0.9709 | 0.9937 | 0.9586 | 0.9437 | 0.9808 | 0.9821 | 0.9799 | 0.9006 | 0.9488 | 0.8788 | 0.9864 | 0.9768 | 0.9843 | 0.9837 | 0.9824 | 0.9809 | 0.9840 | 0.9820 | 0.9906 | 0.9749 | 0.9784 |
75
+ | 0.0182 | 3.0 | 45963 | 0.0219 | 0.9726 | 0.9854 | 0.9789 | 0.9951 | 0.9842 | 0.9631 | 0.9856 | 0.9843 | 0.9854 | 0.9424 | 0.9553 | 0.8962 | 0.9890 | 0.9878 | 0.9921 | 0.9869 | 0.9859 | 0.9815 | 0.9867 | 0.9884 | 0.9917 | 0.9767 | 0.9962 |
76
+ | 0.0106 | 4.0 | 61284 | 0.0196 | 0.9773 | 0.9870 | 0.9822 | 0.9957 | 0.9908 | 0.9708 | 0.9879 | 0.9825 | 0.9915 | 0.9428 | 0.9626 | 0.9056 | 0.9928 | 0.9898 | 0.9925 | 0.9866 | 0.9887 | 0.9880 | 0.9897 | 0.9891 | 0.9912 | 0.9831 | 0.9955 |
77
+ | 0.0044 | 5.0 | 76605 | 0.0214 | 0.9787 | 0.9876 | 0.9831 | 0.9959 | 0.9934 | 0.9710 | 0.9885 | 0.9846 | 0.9915 | 0.9453 | 0.9646 | 0.9125 | 0.9931 | 0.9898 | 0.9937 | 0.9875 | 0.9886 | 0.9893 | 0.9907 | 0.9903 | 0.9924 | 0.9837 | 0.9958 |
78
+
79
+
80
+ ### Framework versions
81
+
82
+ - Transformers 4.39.3
83
+ - Pytorch 2.1.2
84
+ - Datasets 2.18.0
85
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_AADHAR_CARD_f1": 0.9907578558225508,
4
+ "eval_AGE_f1": 0.9708328161846841,
5
+ "eval_CITY_f1": 0.9878752404047161,
6
+ "eval_COUNTRY_f1": 0.9825406381697773,
7
+ "eval_CREDITCARDCVV_f1": 0.9914874551971327,
8
+ "eval_CREDITCARDNUMBER_f1": 0.9427984036763817,
9
+ "eval_DATEOFBIRTH_f1": 0.905574984571076,
10
+ "eval_DATE_f1": 0.9626126791783657,
11
+ "eval_EMAIL_f1": 0.9927686973749381,
12
+ "eval_EXPIRYDATE_f1": 0.9897909577053962,
13
+ "eval_ORGANIZATION_f1": 0.9925087907047851,
14
+ "eval_PAN_CARD_f1": 0.986565752128666,
15
+ "eval_PERSON_f1": 0.988700182691657,
16
+ "eval_PHONENUMBER_f1": 0.9879518072289156,
17
+ "eval_PINCODE_f1": 0.9896975739448322,
18
+ "eval_SECONDARYADDRESS_f1": 0.9890931125025247,
19
+ "eval_STATE_f1": 0.9911617778916357,
20
+ "eval_TIME_f1": 0.9831231454005935,
21
+ "eval_URL_f1": 0.9955277280858676,
22
+ "eval_loss": 0.019619259983301163,
23
+ "eval_overall_accuracy": 0.9957009162002273,
24
+ "eval_overall_f1": 0.9821630214328783,
25
+ "eval_overall_precision": 0.9773414062073856,
26
+ "eval_overall_recall": 0.9870324464275877,
27
+ "eval_runtime": 250.2495,
28
+ "eval_samples": 15321,
29
+ "eval_samples_per_second": 61.223,
30
+ "eval_steps_per_second": 15.309,
31
+ "total_flos": 2.977427926260837e+16,
32
+ "train_loss": 0.04493269212492903,
33
+ "train_runtime": 13569.8925,
34
+ "train_samples": 61281,
35
+ "train_samples_per_second": 22.58,
36
+ "train_steps_per_second": 5.645
37
+ }
eval_results.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_AADHAR_CARD_f1": 0.9907578558225508,
4
+ "eval_AGE_f1": 0.9708328161846841,
5
+ "eval_CITY_f1": 0.9878752404047161,
6
+ "eval_COUNTRY_f1": 0.9825406381697773,
7
+ "eval_CREDITCARDCVV_f1": 0.9914874551971327,
8
+ "eval_CREDITCARDNUMBER_f1": 0.9427984036763817,
9
+ "eval_DATEOFBIRTH_f1": 0.905574984571076,
10
+ "eval_DATE_f1": 0.9626126791783657,
11
+ "eval_EMAIL_f1": 0.9927686973749381,
12
+ "eval_EXPIRYDATE_f1": 0.9897909577053962,
13
+ "eval_ORGANIZATION_f1": 0.9925087907047851,
14
+ "eval_PAN_CARD_f1": 0.986565752128666,
15
+ "eval_PERSON_f1": 0.988700182691657,
16
+ "eval_PHONENUMBER_f1": 0.9879518072289156,
17
+ "eval_PINCODE_f1": 0.9896975739448322,
18
+ "eval_SECONDARYADDRESS_f1": 0.9890931125025247,
19
+ "eval_STATE_f1": 0.9911617778916357,
20
+ "eval_TIME_f1": 0.9831231454005935,
21
+ "eval_URL_f1": 0.9955277280858676,
22
+ "eval_loss": 0.019619259983301163,
23
+ "eval_overall_accuracy": 0.9957009162002273,
24
+ "eval_overall_f1": 0.9821630214328783,
25
+ "eval_overall_precision": 0.9773414062073856,
26
+ "eval_overall_recall": 0.9870324464275877,
27
+ "eval_runtime": 250.2495,
28
+ "eval_samples": 15321,
29
+ "eval_samples_per_second": 61.223,
30
+ "eval_steps_per_second": 15.309
31
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0295ba9e338e084410e89dbee4abb39908d06b80aaa54444d4b25deff3865c89
3
  size 735470556
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab06e76199ec4b084481c8456e759a2081dc2d5f4186bafe68804ad9d21eec70
3
  size 735470556
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "total_flos": 2.977427926260837e+16,
4
+ "train_loss": 0.04493269212492903,
5
+ "train_runtime": 13569.8925,
6
+ "train_samples": 61281,
7
+ "train_samples_per_second": 22.58,
8
+ "train_steps_per_second": 5.645
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,1287 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.019619259983301163,
3
+ "best_model_checkpoint": "./deberta-v3-base_finetuned_bluegennx_run2.19_5e/checkpoint-61284",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 76605,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03,
13
+ "grad_norm": 3.245230197906494,
14
+ "learning_rate": 1.6317472749820508e-06,
15
+ "loss": 2.448,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.07,
20
+ "grad_norm": 2.250610589981079,
21
+ "learning_rate": 3.2634945499641016e-06,
22
+ "loss": 0.6227,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.1,
27
+ "grad_norm": 1.1213871240615845,
28
+ "learning_rate": 4.895241824946153e-06,
29
+ "loss": 0.2667,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.13,
34
+ "grad_norm": 1.0988842248916626,
35
+ "learning_rate": 6.526989099928203e-06,
36
+ "loss": 0.1493,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.16,
41
+ "grad_norm": 0.4658643901348114,
42
+ "learning_rate": 8.158736374910254e-06,
43
+ "loss": 0.1199,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.2,
48
+ "grad_norm": 3.3459713459014893,
49
+ "learning_rate": 9.790483649892306e-06,
50
+ "loss": 0.0827,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.23,
55
+ "grad_norm": 0.9581575989723206,
56
+ "learning_rate": 1.1422230924874356e-05,
57
+ "loss": 0.0722,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.26,
62
+ "grad_norm": 0.7753175497055054,
63
+ "learning_rate": 1.3053978199856406e-05,
64
+ "loss": 0.0662,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.29,
69
+ "grad_norm": 1.246222972869873,
70
+ "learning_rate": 1.4685725474838458e-05,
71
+ "loss": 0.0581,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.33,
76
+ "grad_norm": 0.37093016505241394,
77
+ "learning_rate": 1.6317472749820508e-05,
78
+ "loss": 0.0613,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.36,
83
+ "grad_norm": 0.08493436872959137,
84
+ "learning_rate": 1.794922002480256e-05,
85
+ "loss": 0.0476,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 0.39,
90
+ "grad_norm": 2.2330055236816406,
91
+ "learning_rate": 1.9580967299784612e-05,
92
+ "loss": 0.0513,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 0.42,
97
+ "grad_norm": 0.4164097309112549,
98
+ "learning_rate": 2.1212714574766664e-05,
99
+ "loss": 0.0479,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 0.46,
104
+ "grad_norm": 3.0428450107574463,
105
+ "learning_rate": 2.2844461849748712e-05,
106
+ "loss": 0.0485,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 0.49,
111
+ "grad_norm": 0.09525569528341293,
112
+ "learning_rate": 2.447620912473076e-05,
113
+ "loss": 0.0453,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 0.52,
118
+ "grad_norm": 0.07735457271337509,
119
+ "learning_rate": 2.6107956399712813e-05,
120
+ "loss": 0.0498,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 0.55,
125
+ "grad_norm": 0.946909487247467,
126
+ "learning_rate": 2.7739703674694868e-05,
127
+ "loss": 0.0473,
128
+ "step": 8500
129
+ },
130
+ {
131
+ "epoch": 0.59,
132
+ "grad_norm": 0.9449179172515869,
133
+ "learning_rate": 2.9371450949676916e-05,
134
+ "loss": 0.0474,
135
+ "step": 9000
136
+ },
137
+ {
138
+ "epoch": 0.62,
139
+ "grad_norm": 0.2430952489376068,
140
+ "learning_rate": 3.1003198224658965e-05,
141
+ "loss": 0.049,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 0.65,
146
+ "grad_norm": 0.028986895456910133,
147
+ "learning_rate": 3.2634945499641017e-05,
148
+ "loss": 0.0463,
149
+ "step": 10000
150
+ },
151
+ {
152
+ "epoch": 0.69,
153
+ "grad_norm": 1.1573065519332886,
154
+ "learning_rate": 3.426669277462307e-05,
155
+ "loss": 0.0464,
156
+ "step": 10500
157
+ },
158
+ {
159
+ "epoch": 0.72,
160
+ "grad_norm": 0.5098071694374084,
161
+ "learning_rate": 3.589844004960512e-05,
162
+ "loss": 0.0328,
163
+ "step": 11000
164
+ },
165
+ {
166
+ "epoch": 0.75,
167
+ "grad_norm": 2.0675878524780273,
168
+ "learning_rate": 3.7530187324587165e-05,
169
+ "loss": 0.0475,
170
+ "step": 11500
171
+ },
172
+ {
173
+ "epoch": 0.78,
174
+ "grad_norm": 1.0651479959487915,
175
+ "learning_rate": 3.9161934599569224e-05,
176
+ "loss": 0.0401,
177
+ "step": 12000
178
+ },
179
+ {
180
+ "epoch": 0.82,
181
+ "grad_norm": 1.4569103717803955,
182
+ "learning_rate": 4.079368187455127e-05,
183
+ "loss": 0.041,
184
+ "step": 12500
185
+ },
186
+ {
187
+ "epoch": 0.85,
188
+ "grad_norm": 0.0765555128455162,
189
+ "learning_rate": 4.242542914953333e-05,
190
+ "loss": 0.0418,
191
+ "step": 13000
192
+ },
193
+ {
194
+ "epoch": 0.88,
195
+ "grad_norm": 0.020283367484807968,
196
+ "learning_rate": 4.405717642451537e-05,
197
+ "loss": 0.042,
198
+ "step": 13500
199
+ },
200
+ {
201
+ "epoch": 0.91,
202
+ "grad_norm": 1.3270998001098633,
203
+ "learning_rate": 4.5688923699497425e-05,
204
+ "loss": 0.0379,
205
+ "step": 14000
206
+ },
207
+ {
208
+ "epoch": 0.95,
209
+ "grad_norm": 0.03816336765885353,
210
+ "learning_rate": 4.7320670974479476e-05,
211
+ "loss": 0.0389,
212
+ "step": 14500
213
+ },
214
+ {
215
+ "epoch": 0.98,
216
+ "grad_norm": 0.19806864857673645,
217
+ "learning_rate": 4.895241824946152e-05,
218
+ "loss": 0.0356,
219
+ "step": 15000
220
+ },
221
+ {
222
+ "epoch": 1.0,
223
+ "eval_AADHAR_CARD_f1": 0.9541764859910972,
224
+ "eval_AGE_f1": 0.9221036963532623,
225
+ "eval_CITY_f1": 0.961690662625242,
226
+ "eval_COUNTRY_f1": 0.9815987933634993,
227
+ "eval_CREDITCARDCVV_f1": 0.9242819843342036,
228
+ "eval_CREDITCARDNUMBER_f1": 0.9194598504943332,
229
+ "eval_DATEOFBIRTH_f1": 0.8261840929401251,
230
+ "eval_DATE_f1": 0.9235115474256657,
231
+ "eval_EMAIL_f1": 0.9826121319897253,
232
+ "eval_EXPIRYDATE_f1": 0.9477218225419664,
233
+ "eval_ORGANIZATION_f1": 0.9882065880439204,
234
+ "eval_PAN_CARD_f1": 0.952861952861953,
235
+ "eval_PERSON_f1": 0.9784977775772208,
236
+ "eval_PHONENUMBER_f1": 0.9684334165320805,
237
+ "eval_PINCODE_f1": 0.9187232642316552,
238
+ "eval_SECONDARYADDRESS_f1": 0.9734371879368884,
239
+ "eval_STATE_f1": 0.9665119779254986,
240
+ "eval_TIME_f1": 0.9722814498933902,
241
+ "eval_URL_f1": 0.9888343010272443,
242
+ "eval_loss": 0.03825685754418373,
243
+ "eval_overall_accuracy": 0.9914659328103238,
244
+ "eval_overall_f1": 0.9604457982205258,
245
+ "eval_overall_precision": 0.953495189906404,
246
+ "eval_overall_recall": 0.9674984850988817,
247
+ "eval_runtime": 283.0168,
248
+ "eval_samples_per_second": 54.135,
249
+ "eval_steps_per_second": 13.536,
250
+ "step": 15321
251
+ },
252
+ {
253
+ "epoch": 1.01,
254
+ "grad_norm": 0.31435033679008484,
255
+ "learning_rate": 4.999894750857725e-05,
256
+ "loss": 0.0413,
257
+ "step": 15500
258
+ },
259
+ {
260
+ "epoch": 1.04,
261
+ "grad_norm": 0.7508180737495422,
262
+ "learning_rate": 4.998485702341302e-05,
263
+ "loss": 0.0385,
264
+ "step": 16000
265
+ },
266
+ {
267
+ "epoch": 1.08,
268
+ "grad_norm": 0.05294118449091911,
269
+ "learning_rate": 4.995435313505215e-05,
270
+ "loss": 0.0341,
271
+ "step": 16500
272
+ },
273
+ {
274
+ "epoch": 1.11,
275
+ "grad_norm": 1.2048356533050537,
276
+ "learning_rate": 4.990745588253741e-05,
277
+ "loss": 0.0348,
278
+ "step": 17000
279
+ },
280
+ {
281
+ "epoch": 1.14,
282
+ "grad_norm": 0.943110466003418,
283
+ "learning_rate": 4.9844196074270636e-05,
284
+ "loss": 0.0324,
285
+ "step": 17500
286
+ },
287
+ {
288
+ "epoch": 1.17,
289
+ "grad_norm": 0.6435505151748657,
290
+ "learning_rate": 4.976461526777355e-05,
291
+ "loss": 0.033,
292
+ "step": 18000
293
+ },
294
+ {
295
+ "epoch": 1.21,
296
+ "grad_norm": 1.040231466293335,
297
+ "learning_rate": 4.9668765742387324e-05,
298
+ "loss": 0.0311,
299
+ "step": 18500
300
+ },
301
+ {
302
+ "epoch": 1.24,
303
+ "grad_norm": 0.9562444686889648,
304
+ "learning_rate": 4.95567104649284e-05,
305
+ "loss": 0.0387,
306
+ "step": 19000
307
+ },
308
+ {
309
+ "epoch": 1.27,
310
+ "grad_norm": 0.04553946480154991,
311
+ "learning_rate": 4.942852304832349e-05,
312
+ "loss": 0.0344,
313
+ "step": 19500
314
+ },
315
+ {
316
+ "epoch": 1.31,
317
+ "grad_norm": 0.9238176941871643,
318
+ "learning_rate": 4.9284287703250774e-05,
319
+ "loss": 0.0317,
320
+ "step": 20000
321
+ },
322
+ {
323
+ "epoch": 1.34,
324
+ "grad_norm": 1.8379842042922974,
325
+ "learning_rate": 4.912409918281895e-05,
326
+ "loss": 0.0324,
327
+ "step": 20500
328
+ },
329
+ {
330
+ "epoch": 1.37,
331
+ "grad_norm": 0.031245272606611252,
332
+ "learning_rate": 4.8948062720320765e-05,
333
+ "loss": 0.0297,
334
+ "step": 21000
335
+ },
336
+ {
337
+ "epoch": 1.4,
338
+ "grad_norm": 1.3249530792236328,
339
+ "learning_rate": 4.875629396010166e-05,
340
+ "loss": 0.0326,
341
+ "step": 21500
342
+ },
343
+ {
344
+ "epoch": 1.44,
345
+ "grad_norm": 0.7412520051002502,
346
+ "learning_rate": 4.8548918881589053e-05,
347
+ "loss": 0.0255,
348
+ "step": 22000
349
+ },
350
+ {
351
+ "epoch": 1.47,
352
+ "grad_norm": 0.0375283844769001,
353
+ "learning_rate": 4.832607371653218e-05,
354
+ "loss": 0.0298,
355
+ "step": 22500
356
+ },
357
+ {
358
+ "epoch": 1.5,
359
+ "grad_norm": 4.709392547607422,
360
+ "learning_rate": 4.80879048595068e-05,
361
+ "loss": 0.0289,
362
+ "step": 23000
363
+ },
364
+ {
365
+ "epoch": 1.53,
366
+ "grad_norm": 0.03978699445724487,
367
+ "learning_rate": 4.783456877174363e-05,
368
+ "loss": 0.0292,
369
+ "step": 23500
370
+ },
371
+ {
372
+ "epoch": 1.57,
373
+ "grad_norm": 1.0908358097076416,
374
+ "learning_rate": 4.756623187834363e-05,
375
+ "loss": 0.0282,
376
+ "step": 24000
377
+ },
378
+ {
379
+ "epoch": 1.6,
380
+ "grad_norm": 0.3756016492843628,
381
+ "learning_rate": 4.728307045894771e-05,
382
+ "loss": 0.029,
383
+ "step": 24500
384
+ },
385
+ {
386
+ "epoch": 1.63,
387
+ "grad_norm": 0.9585224390029907,
388
+ "learning_rate": 4.69852705319326e-05,
389
+ "loss": 0.0338,
390
+ "step": 25000
391
+ },
392
+ {
393
+ "epoch": 1.66,
394
+ "grad_norm": 0.8365939855575562,
395
+ "learning_rate": 4.667302773220904e-05,
396
+ "loss": 0.0263,
397
+ "step": 25500
398
+ },
399
+ {
400
+ "epoch": 1.7,
401
+ "grad_norm": 0.04708540067076683,
402
+ "learning_rate": 4.634654718270259e-05,
403
+ "loss": 0.0251,
404
+ "step": 26000
405
+ },
406
+ {
407
+ "epoch": 1.73,
408
+ "grad_norm": 7.851020812988281,
409
+ "learning_rate": 4.600604335960137e-05,
410
+ "loss": 0.0275,
411
+ "step": 26500
412
+ },
413
+ {
414
+ "epoch": 1.76,
415
+ "grad_norm": 0.26771125197410583,
416
+ "learning_rate": 4.5651739951459325e-05,
417
+ "loss": 0.0249,
418
+ "step": 27000
419
+ },
420
+ {
421
+ "epoch": 1.79,
422
+ "grad_norm": 0.9391166567802429,
423
+ "learning_rate": 4.5283869712247654e-05,
424
+ "loss": 0.0258,
425
+ "step": 27500
426
+ },
427
+ {
428
+ "epoch": 1.83,
429
+ "grad_norm": 0.2116001844406128,
430
+ "learning_rate": 4.490267430845077e-05,
431
+ "loss": 0.0283,
432
+ "step": 28000
433
+ },
434
+ {
435
+ "epoch": 1.86,
436
+ "grad_norm": 1.2127752304077148,
437
+ "learning_rate": 4.4508404160307335e-05,
438
+ "loss": 0.0241,
439
+ "step": 28500
440
+ },
441
+ {
442
+ "epoch": 1.89,
443
+ "grad_norm": 0.6359685659408569,
444
+ "learning_rate": 4.410131827730075e-05,
445
+ "loss": 0.0256,
446
+ "step": 29000
447
+ },
448
+ {
449
+ "epoch": 1.93,
450
+ "grad_norm": 0.8148425817489624,
451
+ "learning_rate": 4.368168408800692e-05,
452
+ "loss": 0.027,
453
+ "step": 29500
454
+ },
455
+ {
456
+ "epoch": 1.96,
457
+ "grad_norm": 0.36649465560913086,
458
+ "learning_rate": 4.324977726441138e-05,
459
+ "loss": 0.027,
460
+ "step": 30000
461
+ },
462
+ {
463
+ "epoch": 1.99,
464
+ "grad_norm": 0.3655473291873932,
465
+ "learning_rate": 4.2805881540810985e-05,
466
+ "loss": 0.0231,
467
+ "step": 30500
468
+ },
469
+ {
470
+ "epoch": 2.0,
471
+ "eval_AADHAR_CARD_f1": 0.9585547290116896,
472
+ "eval_AGE_f1": 0.9436792916871618,
473
+ "eval_CITY_f1": 0.9808301383563929,
474
+ "eval_COUNTRY_f1": 0.9821049438883834,
475
+ "eval_CREDITCARDCVV_f1": 0.9798747763864043,
476
+ "eval_CREDITCARDNUMBER_f1": 0.9006162464985994,
477
+ "eval_DATEOFBIRTH_f1": 0.8788368336025848,
478
+ "eval_DATE_f1": 0.9488200589970501,
479
+ "eval_EMAIL_f1": 0.9863582443653618,
480
+ "eval_EXPIRYDATE_f1": 0.97678916827853,
481
+ "eval_ORGANIZATION_f1": 0.9842724080012216,
482
+ "eval_PAN_CARD_f1": 0.9836812144212523,
483
+ "eval_PERSON_f1": 0.9824277196535044,
484
+ "eval_PHONENUMBER_f1": 0.9809048178613397,
485
+ "eval_PINCODE_f1": 0.9840319361277445,
486
+ "eval_SECONDARYADDRESS_f1": 0.9819501865483513,
487
+ "eval_STATE_f1": 0.9906398256186691,
488
+ "eval_TIME_f1": 0.9748799408939787,
489
+ "eval_URL_f1": 0.9783627035467322,
490
+ "eval_loss": 0.026541395112872124,
491
+ "eval_overall_accuracy": 0.9937106918238994,
492
+ "eval_overall_f1": 0.9709243706638617,
493
+ "eval_overall_precision": 0.9606781562088824,
494
+ "eval_overall_recall": 0.981391505536275,
495
+ "eval_runtime": 250.4032,
496
+ "eval_samples_per_second": 61.185,
497
+ "eval_steps_per_second": 15.299,
498
+ "step": 30642
499
+ },
500
+ {
501
+ "epoch": 2.02,
502
+ "grad_norm": 2.719433307647705,
503
+ "learning_rate": 4.235028852741913e-05,
504
+ "loss": 0.0246,
505
+ "step": 31000
506
+ },
507
+ {
508
+ "epoch": 2.06,
509
+ "grad_norm": 0.7214369177818298,
510
+ "learning_rate": 4.188329751879714e-05,
511
+ "loss": 0.0169,
512
+ "step": 31500
513
+ },
514
+ {
515
+ "epoch": 2.09,
516
+ "grad_norm": 0.23133157193660736,
517
+ "learning_rate": 4.140521529723744e-05,
518
+ "loss": 0.0203,
519
+ "step": 32000
520
+ },
521
+ {
522
+ "epoch": 2.12,
523
+ "grad_norm": 0.07947224378585815,
524
+ "learning_rate": 4.091635593122785e-05,
525
+ "loss": 0.0228,
526
+ "step": 32500
527
+ },
528
+ {
529
+ "epoch": 2.15,
530
+ "grad_norm": 0.12643003463745117,
531
+ "learning_rate": 4.041704056912928e-05,
532
+ "loss": 0.0207,
533
+ "step": 33000
534
+ },
535
+ {
536
+ "epoch": 2.19,
537
+ "grad_norm": 0.0956706777215004,
538
+ "learning_rate": 3.990759722820246e-05,
539
+ "loss": 0.0169,
540
+ "step": 33500
541
+ },
542
+ {
543
+ "epoch": 2.22,
544
+ "grad_norm": 1.9679731130599976,
545
+ "learning_rate": 3.938836057912222e-05,
546
+ "loss": 0.0182,
547
+ "step": 34000
548
+ },
549
+ {
550
+ "epoch": 2.25,
551
+ "grad_norm": 0.5308653712272644,
552
+ "learning_rate": 3.8859671726120916e-05,
553
+ "loss": 0.0226,
554
+ "step": 34500
555
+ },
556
+ {
557
+ "epoch": 2.28,
558
+ "grad_norm": 0.40330055356025696,
559
+ "learning_rate": 3.832187798290548e-05,
560
+ "loss": 0.0202,
561
+ "step": 35000
562
+ },
563
+ {
564
+ "epoch": 2.32,
565
+ "grad_norm": 0.1867927610874176,
566
+ "learning_rate": 3.777533264449522e-05,
567
+ "loss": 0.0218,
568
+ "step": 35500
569
+ },
570
+ {
571
+ "epoch": 2.35,
572
+ "grad_norm": 0.533061683177948,
573
+ "learning_rate": 3.722039475513024e-05,
574
+ "loss": 0.015,
575
+ "step": 36000
576
+ },
577
+ {
578
+ "epoch": 2.38,
579
+ "grad_norm": 0.012933309189975262,
580
+ "learning_rate": 3.665742887240311e-05,
581
+ "loss": 0.0196,
582
+ "step": 36500
583
+ },
584
+ {
585
+ "epoch": 2.41,
586
+ "grad_norm": 0.02278105542063713,
587
+ "learning_rate": 3.6086804827768564e-05,
588
+ "loss": 0.0178,
589
+ "step": 37000
590
+ },
591
+ {
592
+ "epoch": 2.45,
593
+ "grad_norm": 0.017285320907831192,
594
+ "learning_rate": 3.5508897483588626e-05,
595
+ "loss": 0.02,
596
+ "step": 37500
597
+ },
598
+ {
599
+ "epoch": 2.48,
600
+ "grad_norm": 0.5036013126373291,
601
+ "learning_rate": 3.4924086486872724e-05,
602
+ "loss": 0.0212,
603
+ "step": 38000
604
+ },
605
+ {
606
+ "epoch": 2.51,
607
+ "grad_norm": 0.2554037570953369,
608
+ "learning_rate": 3.433275601987469e-05,
609
+ "loss": 0.0247,
610
+ "step": 38500
611
+ },
612
+ {
613
+ "epoch": 2.55,
614
+ "grad_norm": 1.2537500858306885,
615
+ "learning_rate": 3.3735294547710354e-05,
616
+ "loss": 0.0192,
617
+ "step": 39000
618
+ },
619
+ {
620
+ "epoch": 2.58,
621
+ "grad_norm": 0.11956170201301575,
622
+ "learning_rate": 3.313209456316154e-05,
623
+ "loss": 0.0169,
624
+ "step": 39500
625
+ },
626
+ {
627
+ "epoch": 2.61,
628
+ "grad_norm": 0.7699991464614868,
629
+ "learning_rate": 3.2523552328834274e-05,
630
+ "loss": 0.0208,
631
+ "step": 40000
632
+ },
633
+ {
634
+ "epoch": 2.64,
635
+ "grad_norm": 0.00504554295912385,
636
+ "learning_rate": 3.1910067616840286e-05,
637
+ "loss": 0.0179,
638
+ "step": 40500
639
+ },
640
+ {
641
+ "epoch": 2.68,
642
+ "grad_norm": 0.020942695438861847,
643
+ "learning_rate": 3.1292043446173205e-05,
644
+ "loss": 0.0175,
645
+ "step": 41000
646
+ },
647
+ {
648
+ "epoch": 2.71,
649
+ "grad_norm": 0.04590239003300667,
650
+ "learning_rate": 3.066988581795156e-05,
651
+ "loss": 0.0183,
652
+ "step": 41500
653
+ },
654
+ {
655
+ "epoch": 2.74,
656
+ "grad_norm": 0.05620180070400238,
657
+ "learning_rate": 3.0044003448702918e-05,
658
+ "loss": 0.0185,
659
+ "step": 42000
660
+ },
661
+ {
662
+ "epoch": 2.77,
663
+ "grad_norm": 0.01712827943265438,
664
+ "learning_rate": 2.9414807501864007e-05,
665
+ "loss": 0.0156,
666
+ "step": 42500
667
+ },
668
+ {
669
+ "epoch": 2.81,
670
+ "grad_norm": 0.07057254761457443,
671
+ "learning_rate": 2.87827113176735e-05,
672
+ "loss": 0.0173,
673
+ "step": 43000
674
+ },
675
+ {
676
+ "epoch": 2.84,
677
+ "grad_norm": 0.02034137025475502,
678
+ "learning_rate": 2.814813014163472e-05,
679
+ "loss": 0.0164,
680
+ "step": 43500
681
+ },
682
+ {
683
+ "epoch": 2.87,
684
+ "grad_norm": 0.12140174210071564,
685
+ "learning_rate": 2.7511480851726734e-05,
686
+ "loss": 0.0176,
687
+ "step": 44000
688
+ },
689
+ {
690
+ "epoch": 2.9,
691
+ "grad_norm": 0.3486383259296417,
692
+ "learning_rate": 2.6873181684543036e-05,
693
+ "loss": 0.0151,
694
+ "step": 44500
695
+ },
696
+ {
697
+ "epoch": 2.94,
698
+ "grad_norm": 0.015888692811131477,
699
+ "learning_rate": 2.6233651960537648e-05,
700
+ "loss": 0.0151,
701
+ "step": 45000
702
+ },
703
+ {
704
+ "epoch": 2.97,
705
+ "grad_norm": 0.348768413066864,
706
+ "learning_rate": 2.559331180855928e-05,
707
+ "loss": 0.0182,
708
+ "step": 45500
709
+ },
710
+ {
711
+ "epoch": 3.0,
712
+ "eval_AADHAR_CARD_f1": 0.9842354177614293,
713
+ "eval_AGE_f1": 0.9630551063565119,
714
+ "eval_CITY_f1": 0.985635543677969,
715
+ "eval_COUNTRY_f1": 0.9843467790487658,
716
+ "eval_CREDITCARDCVV_f1": 0.9854488471009626,
717
+ "eval_CREDITCARDNUMBER_f1": 0.9423696456270134,
718
+ "eval_DATEOFBIRTH_f1": 0.8962226640159046,
719
+ "eval_DATE_f1": 0.9552639427378468,
720
+ "eval_EMAIL_f1": 0.989039202132912,
721
+ "eval_EXPIRYDATE_f1": 0.9878463782207098,
722
+ "eval_ORGANIZATION_f1": 0.9921492659053834,
723
+ "eval_PAN_CARD_f1": 0.9869194312796208,
724
+ "eval_PERSON_f1": 0.9858850543661781,
725
+ "eval_PHONENUMBER_f1": 0.9814869233029679,
726
+ "eval_PINCODE_f1": 0.986728599867286,
727
+ "eval_SECONDARYADDRESS_f1": 0.9883943889393481,
728
+ "eval_STATE_f1": 0.9916826615483045,
729
+ "eval_TIME_f1": 0.9767226189372159,
730
+ "eval_URL_f1": 0.9961994187346301,
731
+ "eval_loss": 0.021940715610980988,
732
+ "eval_overall_accuracy": 0.9950686247786047,
733
+ "eval_overall_f1": 0.9789248441664341,
734
+ "eval_overall_precision": 0.9725538809508275,
735
+ "eval_overall_recall": 0.9853798270258359,
736
+ "eval_runtime": 250.1917,
737
+ "eval_samples_per_second": 61.237,
738
+ "eval_steps_per_second": 15.312,
739
+ "step": 45963
740
+ },
741
+ {
742
+ "epoch": 3.0,
743
+ "grad_norm": 0.023316623643040657,
744
+ "learning_rate": 2.495258188985437e-05,
745
+ "loss": 0.0157,
746
+ "step": 46000
747
+ },
748
+ {
749
+ "epoch": 3.04,
750
+ "grad_norm": 0.5379074811935425,
751
+ "learning_rate": 2.431188312172038e-05,
752
+ "loss": 0.0122,
753
+ "step": 46500
754
+ },
755
+ {
756
+ "epoch": 3.07,
757
+ "grad_norm": 0.021363502368330956,
758
+ "learning_rate": 2.367163640099091e-05,
759
+ "loss": 0.0093,
760
+ "step": 47000
761
+ },
762
+ {
763
+ "epoch": 3.1,
764
+ "grad_norm": 0.006295538041740656,
765
+ "learning_rate": 2.3032262327534213e-05,
766
+ "loss": 0.0116,
767
+ "step": 47500
768
+ },
769
+ {
770
+ "epoch": 3.13,
771
+ "grad_norm": 0.04374461993575096,
772
+ "learning_rate": 2.2394180927946856e-05,
773
+ "loss": 0.0118,
774
+ "step": 48000
775
+ },
776
+ {
777
+ "epoch": 3.17,
778
+ "grad_norm": 0.06676093488931656,
779
+ "learning_rate": 2.1757811379623928e-05,
780
+ "loss": 0.0116,
781
+ "step": 48500
782
+ },
783
+ {
784
+ "epoch": 3.2,
785
+ "grad_norm": 0.009012291207909584,
786
+ "learning_rate": 2.1123571735387153e-05,
787
+ "loss": 0.0089,
788
+ "step": 49000
789
+ },
790
+ {
791
+ "epoch": 3.23,
792
+ "grad_norm": 7.529526710510254,
793
+ "learning_rate": 2.049187864885171e-05,
794
+ "loss": 0.0138,
795
+ "step": 49500
796
+ },
797
+ {
798
+ "epoch": 3.26,
799
+ "grad_norm": 0.0050605605356395245,
800
+ "learning_rate": 1.9863147100712365e-05,
801
+ "loss": 0.0115,
802
+ "step": 50000
803
+ },
804
+ {
805
+ "epoch": 3.3,
806
+ "grad_norm": 0.40215590596199036,
807
+ "learning_rate": 1.9237790126128464e-05,
808
+ "loss": 0.0132,
809
+ "step": 50500
810
+ },
811
+ {
812
+ "epoch": 3.33,
813
+ "grad_norm": 0.006608816795051098,
814
+ "learning_rate": 1.8616218543387105e-05,
815
+ "loss": 0.0123,
816
+ "step": 51000
817
+ },
818
+ {
819
+ "epoch": 3.36,
820
+ "grad_norm": 0.02073794975876808,
821
+ "learning_rate": 1.7998840684022602e-05,
822
+ "loss": 0.0149,
823
+ "step": 51500
824
+ },
825
+ {
826
+ "epoch": 3.39,
827
+ "grad_norm": 0.21203799545764923,
828
+ "learning_rate": 1.7386062124569624e-05,
829
+ "loss": 0.0128,
830
+ "step": 52000
831
+ },
832
+ {
833
+ "epoch": 3.43,
834
+ "grad_norm": 0.0049758171662688255,
835
+ "learning_rate": 1.6778285420126123e-05,
836
+ "loss": 0.0105,
837
+ "step": 52500
838
+ },
839
+ {
840
+ "epoch": 3.46,
841
+ "grad_norm": 0.012410460039973259,
842
+ "learning_rate": 1.6175909839901208e-05,
843
+ "loss": 0.0137,
844
+ "step": 53000
845
+ },
846
+ {
847
+ "epoch": 3.49,
848
+ "grad_norm": 0.00482690054923296,
849
+ "learning_rate": 1.557933110492161e-05,
850
+ "loss": 0.0094,
851
+ "step": 53500
852
+ },
853
+ {
854
+ "epoch": 3.52,
855
+ "grad_norm": 0.02019447833299637,
856
+ "learning_rate": 1.4988941128069068e-05,
857
+ "loss": 0.0129,
858
+ "step": 54000
859
+ },
860
+ {
861
+ "epoch": 3.56,
862
+ "grad_norm": 0.001453628996387124,
863
+ "learning_rate": 1.4405127756619403e-05,
864
+ "loss": 0.0092,
865
+ "step": 54500
866
+ },
867
+ {
868
+ "epoch": 3.59,
869
+ "grad_norm": 0.003288328880444169,
870
+ "learning_rate": 1.382827451745244e-05,
871
+ "loss": 0.0119,
872
+ "step": 55000
873
+ },
874
+ {
875
+ "epoch": 3.62,
876
+ "grad_norm": 0.02008083090186119,
877
+ "learning_rate": 1.3258760365100162e-05,
878
+ "loss": 0.0072,
879
+ "step": 55500
880
+ },
881
+ {
882
+ "epoch": 3.66,
883
+ "grad_norm": 0.818644106388092,
884
+ "learning_rate": 1.2696959432798547e-05,
885
+ "loss": 0.0093,
886
+ "step": 56000
887
+ },
888
+ {
889
+ "epoch": 3.69,
890
+ "grad_norm": 0.06376053392887115,
891
+ "learning_rate": 1.214324078670674e-05,
892
+ "loss": 0.0111,
893
+ "step": 56500
894
+ },
895
+ {
896
+ "epoch": 3.72,
897
+ "grad_norm": 0.08838366717100143,
898
+ "learning_rate": 1.1597968183454874e-05,
899
+ "loss": 0.0123,
900
+ "step": 57000
901
+ },
902
+ {
903
+ "epoch": 3.75,
904
+ "grad_norm": 0.3981121778488159,
905
+ "learning_rate": 1.106149983118004e-05,
906
+ "loss": 0.009,
907
+ "step": 57500
908
+ },
909
+ {
910
+ "epoch": 3.79,
911
+ "grad_norm": 0.22205697000026703,
912
+ "learning_rate": 1.0534188154207075e-05,
913
+ "loss": 0.0091,
914
+ "step": 58000
915
+ },
916
+ {
917
+ "epoch": 3.82,
918
+ "grad_norm": 0.47372546792030334,
919
+ "learning_rate": 1.0016379561529099e-05,
920
+ "loss": 0.0134,
921
+ "step": 58500
922
+ },
923
+ {
924
+ "epoch": 3.85,
925
+ "grad_norm": 1.0051251649856567,
926
+ "learning_rate": 9.508414219239608e-06,
927
+ "loss": 0.0118,
928
+ "step": 59000
929
+ },
930
+ {
931
+ "epoch": 3.88,
932
+ "grad_norm": 0.03393113985657692,
933
+ "learning_rate": 9.010625827065807e-06,
934
+ "loss": 0.0137,
935
+ "step": 59500
936
+ },
937
+ {
938
+ "epoch": 3.92,
939
+ "grad_norm": 0.08359457552433014,
940
+ "learning_rate": 8.523341399149901e-06,
941
+ "loss": 0.0118,
942
+ "step": 60000
943
+ },
944
+ {
945
+ "epoch": 3.95,
946
+ "grad_norm": 0.2319985330104828,
947
+ "learning_rate": 8.046881049222332e-06,
948
+ "loss": 0.0079,
949
+ "step": 60500
950
+ },
951
+ {
952
+ "epoch": 3.98,
953
+ "grad_norm": 0.02699044905602932,
954
+ "learning_rate": 7.581557780308232e-06,
955
+ "loss": 0.0106,
956
+ "step": 61000
957
+ },
958
+ {
959
+ "epoch": 4.0,
960
+ "eval_AADHAR_CARD_f1": 0.9907578558225508,
961
+ "eval_AGE_f1": 0.9708328161846841,
962
+ "eval_CITY_f1": 0.9878752404047161,
963
+ "eval_COUNTRY_f1": 0.9825406381697773,
964
+ "eval_CREDITCARDCVV_f1": 0.9914874551971327,
965
+ "eval_CREDITCARDNUMBER_f1": 0.9427984036763817,
966
+ "eval_DATEOFBIRTH_f1": 0.905574984571076,
967
+ "eval_DATE_f1": 0.9626126791783657,
968
+ "eval_EMAIL_f1": 0.9927686973749381,
969
+ "eval_EXPIRYDATE_f1": 0.9897909577053962,
970
+ "eval_ORGANIZATION_f1": 0.9925087907047851,
971
+ "eval_PAN_CARD_f1": 0.986565752128666,
972
+ "eval_PERSON_f1": 0.988700182691657,
973
+ "eval_PHONENUMBER_f1": 0.9879518072289156,
974
+ "eval_PINCODE_f1": 0.9896975739448322,
975
+ "eval_SECONDARYADDRESS_f1": 0.9890931125025247,
976
+ "eval_STATE_f1": 0.9911617778916357,
977
+ "eval_TIME_f1": 0.9831231454005935,
978
+ "eval_URL_f1": 0.9955277280858676,
979
+ "eval_loss": 0.019619259983301163,
980
+ "eval_overall_accuracy": 0.9957009162002273,
981
+ "eval_overall_f1": 0.9821630214328783,
982
+ "eval_overall_precision": 0.9773414062073856,
983
+ "eval_overall_recall": 0.9870324464275877,
984
+ "eval_runtime": 250.3524,
985
+ "eval_samples_per_second": 61.198,
986
+ "eval_steps_per_second": 15.302,
987
+ "step": 61284
988
+ },
989
+ {
990
+ "epoch": 4.01,
991
+ "grad_norm": 0.21583209931850433,
992
+ "learning_rate": 7.127677279105039e-06,
993
+ "loss": 0.01,
994
+ "step": 61500
995
+ },
996
+ {
997
+ "epoch": 4.05,
998
+ "grad_norm": 0.03639248013496399,
999
+ "learning_rate": 6.685537715166509e-06,
1000
+ "loss": 0.0069,
1001
+ "step": 62000
1002
+ },
1003
+ {
1004
+ "epoch": 4.08,
1005
+ "grad_norm": 0.022935694083571434,
1006
+ "learning_rate": 6.255429545024985e-06,
1007
+ "loss": 0.008,
1008
+ "step": 62500
1009
+ },
1010
+ {
1011
+ "epoch": 4.11,
1012
+ "grad_norm": 0.2646021246910095,
1013
+ "learning_rate": 5.837635321380621e-06,
1014
+ "loss": 0.0054,
1015
+ "step": 63000
1016
+ },
1017
+ {
1018
+ "epoch": 4.14,
1019
+ "grad_norm": 0.042872123420238495,
1020
+ "learning_rate": 5.432429507482867e-06,
1021
+ "loss": 0.0059,
1022
+ "step": 63500
1023
+ },
1024
+ {
1025
+ "epoch": 4.18,
1026
+ "grad_norm": 0.3728538155555725,
1027
+ "learning_rate": 5.040078296826237e-06,
1028
+ "loss": 0.01,
1029
+ "step": 64000
1030
+ },
1031
+ {
1032
+ "epoch": 4.21,
1033
+ "grad_norm": 0.0075051430612802505,
1034
+ "learning_rate": 4.660839438278744e-06,
1035
+ "loss": 0.0098,
1036
+ "step": 64500
1037
+ },
1038
+ {
1039
+ "epoch": 4.24,
1040
+ "grad_norm": 1.2772583961486816,
1041
+ "learning_rate": 4.294962066757888e-06,
1042
+ "loss": 0.0064,
1043
+ "step": 65000
1044
+ },
1045
+ {
1046
+ "epoch": 4.28,
1047
+ "grad_norm": 0.9278016686439514,
1048
+ "learning_rate": 3.94268653956544e-06,
1049
+ "loss": 0.0039,
1050
+ "step": 65500
1051
+ },
1052
+ {
1053
+ "epoch": 4.31,
1054
+ "grad_norm": 0.8312927484512329,
1055
+ "learning_rate": 3.6042442784885526e-06,
1056
+ "loss": 0.006,
1057
+ "step": 66000
1058
+ },
1059
+ {
1060
+ "epoch": 4.34,
1061
+ "grad_norm": 0.02065468765795231,
1062
+ "learning_rate": 3.2798576177709097e-06,
1063
+ "loss": 0.0061,
1064
+ "step": 66500
1065
+ },
1066
+ {
1067
+ "epoch": 4.37,
1068
+ "grad_norm": 0.958082377910614,
1069
+ "learning_rate": 2.9697396580537965e-06,
1070
+ "loss": 0.0049,
1071
+ "step": 67000
1072
+ },
1073
+ {
1074
+ "epoch": 4.41,
1075
+ "grad_norm": 0.0006930904928594828,
1076
+ "learning_rate": 2.674094126383009e-06,
1077
+ "loss": 0.0056,
1078
+ "step": 67500
1079
+ },
1080
+ {
1081
+ "epoch": 4.44,
1082
+ "grad_norm": 0.016762958839535713,
1083
+ "learning_rate": 2.393115242373664e-06,
1084
+ "loss": 0.0058,
1085
+ "step": 68000
1086
+ },
1087
+ {
1088
+ "epoch": 4.47,
1089
+ "grad_norm": 0.02655264548957348,
1090
+ "learning_rate": 2.1269875906206852e-06,
1091
+ "loss": 0.0062,
1092
+ "step": 68500
1093
+ },
1094
+ {
1095
+ "epoch": 4.5,
1096
+ "grad_norm": 0.2882128059864044,
1097
+ "learning_rate": 1.8758859994389228e-06,
1098
+ "loss": 0.0069,
1099
+ "step": 69000
1100
+ },
1101
+ {
1102
+ "epoch": 4.54,
1103
+ "grad_norm": 0.15941615402698517,
1104
+ "learning_rate": 1.6399754260124522e-06,
1105
+ "loss": 0.0056,
1106
+ "step": 69500
1107
+ },
1108
+ {
1109
+ "epoch": 4.57,
1110
+ "grad_norm": 0.023537589237093925,
1111
+ "learning_rate": 1.4194108480286211e-06,
1112
+ "loss": 0.0069,
1113
+ "step": 70000
1114
+ },
1115
+ {
1116
+ "epoch": 4.6,
1117
+ "grad_norm": 0.008936550468206406,
1118
+ "learning_rate": 1.214337161867901e-06,
1119
+ "loss": 0.0046,
1120
+ "step": 70500
1121
+ },
1122
+ {
1123
+ "epoch": 4.63,
1124
+ "grad_norm": 0.006893637590110302,
1125
+ "learning_rate": 1.0248890874165306e-06,
1126
+ "loss": 0.0056,
1127
+ "step": 71000
1128
+ },
1129
+ {
1130
+ "epoch": 4.67,
1131
+ "grad_norm": 0.010946184396743774,
1132
+ "learning_rate": 8.511910795644451e-07,
1133
+ "loss": 0.0048,
1134
+ "step": 71500
1135
+ },
1136
+ {
1137
+ "epoch": 4.7,
1138
+ "grad_norm": 0.034461166709661484,
1139
+ "learning_rate": 6.933572464466104e-07,
1140
+ "loss": 0.0054,
1141
+ "step": 72000
1142
+ },
1143
+ {
1144
+ "epoch": 4.73,
1145
+ "grad_norm": 0.27507975697517395,
1146
+ "learning_rate": 5.514912744815053e-07,
1147
+ "loss": 0.0072,
1148
+ "step": 72500
1149
+ },
1150
+ {
1151
+ "epoch": 4.76,
1152
+ "grad_norm": 0.02132672443985939,
1153
+ "learning_rate": 4.256863602559946e-07,
1154
+ "loss": 0.0062,
1155
+ "step": 73000
1156
+ },
1157
+ {
1158
+ "epoch": 4.8,
1159
+ "grad_norm": 0.785845160484314,
1160
+ "learning_rate": 3.160251493012967e-07,
1161
+ "loss": 0.0074,
1162
+ "step": 73500
1163
+ },
1164
+ {
1165
+ "epoch": 4.83,
1166
+ "grad_norm": 0.004894765559583902,
1167
+ "learning_rate": 2.2257968180034728e-07,
1168
+ "loss": 0.0052,
1169
+ "step": 74000
1170
+ },
1171
+ {
1172
+ "epoch": 4.86,
1173
+ "grad_norm": 0.7538247108459473,
1174
+ "learning_rate": 1.4541134526212807e-07,
1175
+ "loss": 0.0053,
1176
+ "step": 74500
1177
+ },
1178
+ {
1179
+ "epoch": 4.9,
1180
+ "grad_norm": 1.4640376567840576,
1181
+ "learning_rate": 8.45708341941337e-08,
1182
+ "loss": 0.0056,
1183
+ "step": 75000
1184
+ },
1185
+ {
1186
+ "epoch": 4.93,
1187
+ "grad_norm": 0.056750182062387466,
1188
+ "learning_rate": 4.009811679941966e-08,
1189
+ "loss": 0.0066,
1190
+ "step": 75500
1191
+ },
1192
+ {
1193
+ "epoch": 4.96,
1194
+ "grad_norm": 0.013171417638659477,
1195
+ "learning_rate": 1.2022408720130384e-08,
1196
+ "loss": 0.007,
1197
+ "step": 76000
1198
+ },
1199
+ {
1200
+ "epoch": 4.99,
1201
+ "grad_norm": 0.001819392666220665,
1202
+ "learning_rate": 3.6215384475180027e-10,
1203
+ "loss": 0.0044,
1204
+ "step": 76500
1205
+ },
1206
+ {
1207
+ "epoch": 5.0,
1208
+ "eval_AADHAR_CARD_f1": 0.993405433922448,
1209
+ "eval_AGE_f1": 0.9710379117464264,
1210
+ "eval_CITY_f1": 0.9885153826808618,
1211
+ "eval_COUNTRY_f1": 0.9846153846153847,
1212
+ "eval_CREDITCARDCVV_f1": 0.9914874551971327,
1213
+ "eval_CREDITCARDNUMBER_f1": 0.9452593917710196,
1214
+ "eval_DATEOFBIRTH_f1": 0.9125102207686018,
1215
+ "eval_DATE_f1": 0.9646096654275093,
1216
+ "eval_EMAIL_f1": 0.9930651872399444,
1217
+ "eval_EXPIRYDATE_f1": 0.9898107714701602,
1218
+ "eval_ORGANIZATION_f1": 0.9936721779955092,
1219
+ "eval_PAN_CARD_f1": 0.9874999999999999,
1220
+ "eval_PERSON_f1": 0.988620456084895,
1221
+ "eval_PHONENUMBER_f1": 0.9892568064753495,
1222
+ "eval_PINCODE_f1": 0.9907161803713527,
1223
+ "eval_SECONDARYADDRESS_f1": 0.9903128153380424,
1224
+ "eval_STATE_f1": 0.9924407431133888,
1225
+ "eval_TIME_f1": 0.9837375708577271,
1226
+ "eval_URL_f1": 0.9957522915269394,
1227
+ "eval_loss": 0.021437210962176323,
1228
+ "eval_overall_accuracy": 0.9958975737683697,
1229
+ "eval_overall_f1": 0.983098622444503,
1230
+ "eval_overall_precision": 0.9786869315514211,
1231
+ "eval_overall_recall": 0.9875502671734699,
1232
+ "eval_runtime": 250.4213,
1233
+ "eval_samples_per_second": 61.181,
1234
+ "eval_steps_per_second": 15.298,
1235
+ "step": 76605
1236
+ },
1237
+ {
1238
+ "epoch": 5.0,
1239
+ "step": 76605,
1240
+ "total_flos": 2.977427926260837e+16,
1241
+ "train_loss": 0.04493269212492903,
1242
+ "train_runtime": 13569.8925,
1243
+ "train_samples_per_second": 22.58,
1244
+ "train_steps_per_second": 5.645
1245
+ },
1246
+ {
1247
+ "epoch": 5.0,
1248
+ "eval_AADHAR_CARD_f1": 0.9907578558225508,
1249
+ "eval_AGE_f1": 0.9708328161846841,
1250
+ "eval_CITY_f1": 0.9878752404047161,
1251
+ "eval_COUNTRY_f1": 0.9825406381697773,
1252
+ "eval_CREDITCARDCVV_f1": 0.9914874551971327,
1253
+ "eval_CREDITCARDNUMBER_f1": 0.9427984036763817,
1254
+ "eval_DATEOFBIRTH_f1": 0.905574984571076,
1255
+ "eval_DATE_f1": 0.9626126791783657,
1256
+ "eval_EMAIL_f1": 0.9927686973749381,
1257
+ "eval_EXPIRYDATE_f1": 0.9897909577053962,
1258
+ "eval_ORGANIZATION_f1": 0.9925087907047851,
1259
+ "eval_PAN_CARD_f1": 0.986565752128666,
1260
+ "eval_PERSON_f1": 0.988700182691657,
1261
+ "eval_PHONENUMBER_f1": 0.9879518072289156,
1262
+ "eval_PINCODE_f1": 0.9896975739448322,
1263
+ "eval_SECONDARYADDRESS_f1": 0.9890931125025247,
1264
+ "eval_STATE_f1": 0.9911617778916357,
1265
+ "eval_TIME_f1": 0.9831231454005935,
1266
+ "eval_URL_f1": 0.9955277280858676,
1267
+ "eval_loss": 0.019619259983301163,
1268
+ "eval_overall_accuracy": 0.9957009162002273,
1269
+ "eval_overall_f1": 0.9821630214328783,
1270
+ "eval_overall_precision": 0.9773414062073856,
1271
+ "eval_overall_recall": 0.9870324464275877,
1272
+ "eval_runtime": 250.2495,
1273
+ "eval_samples_per_second": 61.223,
1274
+ "eval_steps_per_second": 15.309,
1275
+ "step": 76605
1276
+ }
1277
+ ],
1278
+ "logging_steps": 500,
1279
+ "max_steps": 76605,
1280
+ "num_input_tokens_seen": 0,
1281
+ "num_train_epochs": 5,
1282
+ "save_steps": 500,
1283
+ "total_flos": 2.977427926260837e+16,
1284
+ "train_batch_size": 4,
1285
+ "trial_name": null,
1286
+ "trial_params": null
1287
+ }