akahana commited on
Commit
5366f78
1 Parent(s): 4b648b6

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: roberta-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # roberta-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: roberta-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.4158020342790051
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # roberta-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 3.8064
32
+ - Accuracy: 0.4158
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 11.0,
3
- "eval_accuracy": 0.2780392959476054,
4
- "eval_loss": 5.024305820465088,
5
- "eval_runtime": 29.6763,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 136.574,
8
- "eval_steps_per_second": 34.169,
9
- "perplexity": 152.0646592908706,
10
- "total_flos": 5.807695355039002e+16,
11
- "train_loss": 1.5156397336923944,
12
- "train_runtime": 4860.501,
13
  "train_samples": 80219,
14
- "train_samples_per_second": 181.547,
15
- "train_steps_per_second": 11.347
16
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.4158020342790051,
4
+ "eval_loss": 3.806352376937866,
5
+ "eval_runtime": 29.338,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 138.149,
8
+ "eval_steps_per_second": 34.563,
9
+ "perplexity": 44.98604708614023,
10
+ "total_flos": 7.919584575053184e+16,
11
+ "train_loss": 1.1771604976443562,
12
+ "train_runtime": 4937.567,
13
  "train_samples": 80219,
14
+ "train_samples_per_second": 243.7,
15
+ "train_steps_per_second": 15.232
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 11.0,
3
- "eval_accuracy": 0.2780392959476054,
4
- "eval_loss": 5.024305820465088,
5
- "eval_runtime": 29.6763,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 136.574,
8
- "eval_steps_per_second": 34.169,
9
- "perplexity": 152.0646592908706
10
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.4158020342790051,
4
+ "eval_loss": 3.806352376937866,
5
+ "eval_runtime": 29.338,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 138.149,
8
+ "eval_steps_per_second": 34.563,
9
+ "perplexity": 44.98604708614023
10
  }
runs/Jul13_01-17-08_798ef0aa6e31/events.out.tfevents.1720838484.798ef0aa6e31.2600.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f46e05ab7f3a3dbad4431e2d8e8721723ce382c15c25c78db758fd08a7df44
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 11.0,
3
- "total_flos": 5.807695355039002e+16,
4
- "train_loss": 1.5156397336923944,
5
- "train_runtime": 4860.501,
6
  "train_samples": 80219,
7
- "train_samples_per_second": 181.547,
8
- "train_steps_per_second": 11.347
9
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "total_flos": 7.919584575053184e+16,
4
+ "train_loss": 1.1771604976443562,
5
+ "train_runtime": 4937.567,
6
  "train_samples": 80219,
7
+ "train_samples_per_second": 243.7,
8
+ "train_steps_per_second": 15.232
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.0,
5
  "eval_steps": 500,
6
- "global_step": 55154,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -795,12 +795,301 @@
795
  "train_runtime": 4860.501,
796
  "train_samples_per_second": 181.547,
797
  "train_steps_per_second": 11.347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
798
  }
799
  ],
800
  "logging_steps": 500,
801
- "max_steps": 55154,
802
  "num_input_tokens_seen": 0,
803
- "num_train_epochs": 11,
804
  "save_steps": 500,
805
  "stateful_callbacks": {
806
  "TrainerControl": {
@@ -814,7 +1103,7 @@
814
  "attributes": {}
815
  }
816
  },
817
- "total_flos": 5.807695355039002e+16,
818
  "train_batch_size": 16,
819
  "trial_name": null,
820
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
  "eval_steps": 500,
6
+ "global_step": 75210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
795
  "train_runtime": 4860.501,
796
  "train_samples_per_second": 181.547,
797
  "train_steps_per_second": 11.347
798
+ },
799
+ {
800
+ "epoch": 11.069006781013163,
801
+ "grad_norm": 7.099039077758789,
802
+ "learning_rate": 4.976997739662279e-05,
803
+ "loss": 5.2813,
804
+ "step": 55500
805
+ },
806
+ {
807
+ "epoch": 11.168727562824092,
808
+ "grad_norm": 6.935009479522705,
809
+ "learning_rate": 4.943757479058636e-05,
810
+ "loss": 5.2781,
811
+ "step": 56000
812
+ },
813
+ {
814
+ "epoch": 11.268448344635022,
815
+ "grad_norm": 8.239794731140137,
816
+ "learning_rate": 4.910517218454993e-05,
817
+ "loss": 5.2531,
818
+ "step": 56500
819
+ },
820
+ {
821
+ "epoch": 11.36816912644595,
822
+ "grad_norm": 6.757853031158447,
823
+ "learning_rate": 4.87727695785135e-05,
824
+ "loss": 5.1861,
825
+ "step": 57000
826
+ },
827
+ {
828
+ "epoch": 11.46788990825688,
829
+ "grad_norm": 7.666926383972168,
830
+ "learning_rate": 4.844036697247707e-05,
831
+ "loss": 5.1783,
832
+ "step": 57500
833
+ },
834
+ {
835
+ "epoch": 11.56761069006781,
836
+ "grad_norm": 7.166041374206543,
837
+ "learning_rate": 4.810796436644063e-05,
838
+ "loss": 5.1202,
839
+ "step": 58000
840
+ },
841
+ {
842
+ "epoch": 11.66733147187874,
843
+ "grad_norm": 7.543915748596191,
844
+ "learning_rate": 4.77755617604042e-05,
845
+ "loss": 5.0482,
846
+ "step": 58500
847
+ },
848
+ {
849
+ "epoch": 11.76705225368967,
850
+ "grad_norm": 8.00036907196045,
851
+ "learning_rate": 4.744315915436777e-05,
852
+ "loss": 5.0167,
853
+ "step": 59000
854
+ },
855
+ {
856
+ "epoch": 11.866773035500598,
857
+ "grad_norm": 6.7936272621154785,
858
+ "learning_rate": 4.711075654833134e-05,
859
+ "loss": 4.9823,
860
+ "step": 59500
861
+ },
862
+ {
863
+ "epoch": 11.966493817311529,
864
+ "grad_norm": 7.003523826599121,
865
+ "learning_rate": 4.677835394229491e-05,
866
+ "loss": 4.9457,
867
+ "step": 60000
868
+ },
869
+ {
870
+ "epoch": 12.066214599122457,
871
+ "grad_norm": 7.01780891418457,
872
+ "learning_rate": 4.644595133625848e-05,
873
+ "loss": 4.825,
874
+ "step": 60500
875
+ },
876
+ {
877
+ "epoch": 12.165935380933387,
878
+ "grad_norm": 7.654853820800781,
879
+ "learning_rate": 4.6113548730222045e-05,
880
+ "loss": 4.7741,
881
+ "step": 61000
882
+ },
883
+ {
884
+ "epoch": 12.265656162744316,
885
+ "grad_norm": 7.968235492706299,
886
+ "learning_rate": 4.578181092939769e-05,
887
+ "loss": 4.7404,
888
+ "step": 61500
889
+ },
890
+ {
891
+ "epoch": 12.365376944555246,
892
+ "grad_norm": 7.112838268280029,
893
+ "learning_rate": 4.544940832336126e-05,
894
+ "loss": 4.6502,
895
+ "step": 62000
896
+ },
897
+ {
898
+ "epoch": 12.465097726366174,
899
+ "grad_norm": 6.567187786102295,
900
+ "learning_rate": 4.511700571732483e-05,
901
+ "loss": 4.6277,
902
+ "step": 62500
903
+ },
904
+ {
905
+ "epoch": 12.564818508177105,
906
+ "grad_norm": 6.989046096801758,
907
+ "learning_rate": 4.478460311128839e-05,
908
+ "loss": 4.5757,
909
+ "step": 63000
910
+ },
911
+ {
912
+ "epoch": 12.664539289988033,
913
+ "grad_norm": 6.270955562591553,
914
+ "learning_rate": 4.445220050525196e-05,
915
+ "loss": 4.5394,
916
+ "step": 63500
917
+ },
918
+ {
919
+ "epoch": 12.764260071798963,
920
+ "grad_norm": 6.227508544921875,
921
+ "learning_rate": 4.412046270442761e-05,
922
+ "loss": 4.4651,
923
+ "step": 64000
924
+ },
925
+ {
926
+ "epoch": 12.863980853609892,
927
+ "grad_norm": 6.464995861053467,
928
+ "learning_rate": 4.378806009839118e-05,
929
+ "loss": 4.423,
930
+ "step": 64500
931
+ },
932
+ {
933
+ "epoch": 12.963701635420822,
934
+ "grad_norm": 6.102914810180664,
935
+ "learning_rate": 4.345565749235474e-05,
936
+ "loss": 4.3969,
937
+ "step": 65000
938
+ },
939
+ {
940
+ "epoch": 13.06342241723175,
941
+ "grad_norm": 6.3487067222595215,
942
+ "learning_rate": 4.312325488631831e-05,
943
+ "loss": 4.2689,
944
+ "step": 65500
945
+ },
946
+ {
947
+ "epoch": 13.16314319904268,
948
+ "grad_norm": 6.235875129699707,
949
+ "learning_rate": 4.279085228028188e-05,
950
+ "loss": 4.2232,
951
+ "step": 66000
952
+ },
953
+ {
954
+ "epoch": 13.26286398085361,
955
+ "grad_norm": 5.931600570678711,
956
+ "learning_rate": 4.245844967424545e-05,
957
+ "loss": 4.222,
958
+ "step": 66500
959
+ },
960
+ {
961
+ "epoch": 13.36258476266454,
962
+ "grad_norm": 5.873235702514648,
963
+ "learning_rate": 4.212604706820902e-05,
964
+ "loss": 4.1722,
965
+ "step": 67000
966
+ },
967
+ {
968
+ "epoch": 13.462305544475468,
969
+ "grad_norm": 6.30717134475708,
970
+ "learning_rate": 4.179364446217259e-05,
971
+ "loss": 4.1255,
972
+ "step": 67500
973
+ },
974
+ {
975
+ "epoch": 13.562026326286398,
976
+ "grad_norm": 5.893185138702393,
977
+ "learning_rate": 4.146190666134823e-05,
978
+ "loss": 4.0975,
979
+ "step": 68000
980
+ },
981
+ {
982
+ "epoch": 13.661747108097327,
983
+ "grad_norm": 6.775746822357178,
984
+ "learning_rate": 4.113016886052387e-05,
985
+ "loss": 4.0787,
986
+ "step": 68500
987
+ },
988
+ {
989
+ "epoch": 13.761467889908257,
990
+ "grad_norm": 5.948095798492432,
991
+ "learning_rate": 4.0797766254487435e-05,
992
+ "loss": 4.0581,
993
+ "step": 69000
994
+ },
995
+ {
996
+ "epoch": 13.861188671719185,
997
+ "grad_norm": 5.961909770965576,
998
+ "learning_rate": 4.0465363648451005e-05,
999
+ "loss": 4.0097,
1000
+ "step": 69500
1001
+ },
1002
+ {
1003
+ "epoch": 13.960909453530116,
1004
+ "grad_norm": 5.72122859954834,
1005
+ "learning_rate": 4.0132961042414575e-05,
1006
+ "loss": 3.9751,
1007
+ "step": 70000
1008
+ },
1009
+ {
1010
+ "epoch": 14.060630235341046,
1011
+ "grad_norm": 6.1757378578186035,
1012
+ "learning_rate": 3.980122324159022e-05,
1013
+ "loss": 3.9707,
1014
+ "step": 70500
1015
+ },
1016
+ {
1017
+ "epoch": 14.160351017151974,
1018
+ "grad_norm": 5.7611236572265625,
1019
+ "learning_rate": 3.946882063555378e-05,
1020
+ "loss": 3.9126,
1021
+ "step": 71000
1022
+ },
1023
+ {
1024
+ "epoch": 14.260071798962905,
1025
+ "grad_norm": 6.233034133911133,
1026
+ "learning_rate": 3.913641802951735e-05,
1027
+ "loss": 3.9005,
1028
+ "step": 71500
1029
+ },
1030
+ {
1031
+ "epoch": 14.359792580773833,
1032
+ "grad_norm": 6.282217979431152,
1033
+ "learning_rate": 3.880401542348092e-05,
1034
+ "loss": 3.8648,
1035
+ "step": 72000
1036
+ },
1037
+ {
1038
+ "epoch": 14.459513362584763,
1039
+ "grad_norm": 6.495648384094238,
1040
+ "learning_rate": 3.847161281744449e-05,
1041
+ "loss": 3.8567,
1042
+ "step": 72500
1043
+ },
1044
+ {
1045
+ "epoch": 14.559234144395692,
1046
+ "grad_norm": 6.3030195236206055,
1047
+ "learning_rate": 3.813921021140806e-05,
1048
+ "loss": 3.839,
1049
+ "step": 73000
1050
+ },
1051
+ {
1052
+ "epoch": 14.658954926206622,
1053
+ "grad_norm": 5.807531833648682,
1054
+ "learning_rate": 3.78074724105837e-05,
1055
+ "loss": 3.8156,
1056
+ "step": 73500
1057
+ },
1058
+ {
1059
+ "epoch": 14.75867570801755,
1060
+ "grad_norm": 5.283077716827393,
1061
+ "learning_rate": 3.747506980454727e-05,
1062
+ "loss": 3.8142,
1063
+ "step": 74000
1064
+ },
1065
+ {
1066
+ "epoch": 14.85839648982848,
1067
+ "grad_norm": 5.933303356170654,
1068
+ "learning_rate": 3.714266719851084e-05,
1069
+ "loss": 3.8109,
1070
+ "step": 74500
1071
+ },
1072
+ {
1073
+ "epoch": 14.95811727163941,
1074
+ "grad_norm": 6.217842102050781,
1075
+ "learning_rate": 3.681026459247441e-05,
1076
+ "loss": 3.7937,
1077
+ "step": 75000
1078
+ },
1079
+ {
1080
+ "epoch": 15.0,
1081
+ "step": 75210,
1082
+ "total_flos": 7.919584575053184e+16,
1083
+ "train_loss": 1.1771604976443562,
1084
+ "train_runtime": 4937.567,
1085
+ "train_samples_per_second": 243.7,
1086
+ "train_steps_per_second": 15.232
1087
  }
1088
  ],
1089
  "logging_steps": 500,
1090
+ "max_steps": 75210,
1091
  "num_input_tokens_seen": 0,
1092
+ "num_train_epochs": 15,
1093
  "save_steps": 500,
1094
  "stateful_callbacks": {
1095
  "TrainerControl": {
 
1103
  "attributes": {}
1104
  }
1105
  },
1106
+ "total_flos": 7.919584575053184e+16,
1107
  "train_batch_size": 16,
1108
  "trial_name": null,
1109
  "trial_params": null