baohuynhbk14 commited on
Commit
ca16a96
1 Parent(s): 75a3d18

Model save

Browse files
Files changed (2) hide show
  1. README.md +58 -0
  2. trainer_state.json +126 -0
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: openbmb/MiniCPM-V-2_6
3
+ library_name: peft
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: miniCPM_finetune_lora_viet_vqa
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # miniCPM_finetune_lora_viet_vqa
15
+
16
+ This model is a fine-tuned version of [openbmb/MiniCPM-V-2_6](https://huggingface.co/openbmb/MiniCPM-V-2_6) on an unknown dataset.
17
+
18
+ ## Model description
19
+
20
+ More information needed
21
+
22
+ ## Intended uses & limitations
23
+
24
+ More information needed
25
+
26
+ ## Training and evaluation data
27
+
28
+ More information needed
29
+
30
+ ## Training procedure
31
+
32
+ ### Training hyperparameters
33
+
34
+ The following hyperparameters were used during training:
35
+ - learning_rate: 1e-06
36
+ - train_batch_size: 8
37
+ - eval_batch_size: 4
38
+ - seed: 42
39
+ - distributed_type: multi-GPU
40
+ - num_devices: 2
41
+ - total_train_batch_size: 16
42
+ - total_eval_batch_size: 8
43
+ - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-08
44
+ - lr_scheduler_type: cosine
45
+ - lr_scheduler_warmup_ratio: 0.01
46
+ - num_epochs: 1.0
47
+
48
+ ### Training results
49
+
50
+
51
+
52
+ ### Framework versions
53
+
54
+ - PEFT 0.12.0
55
+ - Transformers 4.44.0
56
+ - Pytorch 2.1.2
57
+ - Datasets 2.20.0
58
+ - Tokenizers 0.19.1
trainer_state.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 1000,
6
+ "global_step": 60,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.08333333333333333,
13
+ "grad_norm": 1.0525362491607666,
14
+ "learning_rate": 1e-06,
15
+ "loss": 1.5648,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.16666666666666666,
20
+ "grad_norm": 1.0406826734542847,
21
+ "learning_rate": 1e-06,
22
+ "loss": 1.4231,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.25,
27
+ "grad_norm": 1.1155447959899902,
28
+ "learning_rate": 1e-06,
29
+ "loss": 1.4725,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.3333333333333333,
34
+ "grad_norm": 1.1421337127685547,
35
+ "learning_rate": 1e-06,
36
+ "loss": 1.4378,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.4166666666666667,
41
+ "grad_norm": 1.1258127689361572,
42
+ "learning_rate": 1e-06,
43
+ "loss": 1.4757,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.5,
48
+ "grad_norm": 0.9541631937026978,
49
+ "learning_rate": 1e-06,
50
+ "loss": 1.4635,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.5833333333333334,
55
+ "grad_norm": 0.9896816611289978,
56
+ "learning_rate": 1e-06,
57
+ "loss": 1.4231,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 0.6666666666666666,
62
+ "grad_norm": 0.9481335282325745,
63
+ "learning_rate": 1e-06,
64
+ "loss": 1.3768,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.75,
69
+ "grad_norm": 1.1283329725265503,
70
+ "learning_rate": 1e-06,
71
+ "loss": 1.3612,
72
+ "step": 45
73
+ },
74
+ {
75
+ "epoch": 0.8333333333333334,
76
+ "grad_norm": 0.891376256942749,
77
+ "learning_rate": 1e-06,
78
+ "loss": 1.4153,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 0.9166666666666666,
83
+ "grad_norm": 1.1683685779571533,
84
+ "learning_rate": 1e-06,
85
+ "loss": 1.4396,
86
+ "step": 55
87
+ },
88
+ {
89
+ "epoch": 1.0,
90
+ "grad_norm": 0.9238619208335876,
91
+ "learning_rate": 1e-06,
92
+ "loss": 1.4343,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 1.0,
97
+ "step": 60,
98
+ "total_flos": 4.573495361431142e+16,
99
+ "train_loss": 1.440619428952535,
100
+ "train_runtime": 599.9581,
101
+ "train_samples_per_second": 1.593,
102
+ "train_steps_per_second": 0.1
103
+ }
104
+ ],
105
+ "logging_steps": 5,
106
+ "max_steps": 60,
107
+ "num_input_tokens_seen": 0,
108
+ "num_train_epochs": 1,
109
+ "save_steps": 200,
110
+ "stateful_callbacks": {
111
+ "TrainerControl": {
112
+ "args": {
113
+ "should_epoch_stop": false,
114
+ "should_evaluate": false,
115
+ "should_log": false,
116
+ "should_save": true,
117
+ "should_training_stop": true
118
+ },
119
+ "attributes": {}
120
+ }
121
+ },
122
+ "total_flos": 4.573495361431142e+16,
123
+ "train_batch_size": 8,
124
+ "trial_name": null,
125
+ "trial_params": null
126
+ }