Erick03 commited on
Commit
6bc03d4
1 Parent(s): 63de8ff

Training in progress, step 100

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  base_model: HuggingFaceTB/SmolLM2-135M-Instruct
3
- library_name: transformers
4
  model_name: HFTB-SmolLM2-135M-Instruct-OTCMedicinePHv2
5
  tags:
6
  - generated_from_trainer
@@ -33,6 +33,7 @@ This model was trained with SFT.
33
 
34
  ### Framework versions
35
 
 
36
  - TRL: 0.12.0
37
  - Transformers: 4.46.2
38
  - Pytorch: 2.3.0+cu121
 
1
  ---
2
  base_model: HuggingFaceTB/SmolLM2-135M-Instruct
3
+ library_name: peft
4
  model_name: HFTB-SmolLM2-135M-Instruct-OTCMedicinePHv2
5
  tags:
6
  - generated_from_trainer
 
33
 
34
  ### Framework versions
35
 
36
+ - PEFT 0.13.2
37
  - TRL: 0.12.0
38
  - Transformers: 4.46.2
39
  - Pytorch: 2.3.0+cu121
adapter_config.json CHANGED
@@ -22,11 +22,11 @@
22
  "target_modules": [
23
  "k_proj",
24
  "o_proj",
25
- "q_proj",
26
- "v_proj",
27
- "gate_proj",
28
  "down_proj",
29
- "up_proj"
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
22
  "target_modules": [
23
  "k_proj",
24
  "o_proj",
 
 
 
25
  "down_proj",
26
+ "v_proj",
27
+ "q_proj",
28
+ "up_proj",
29
+ "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1e9903521addfcb8cd9ec62491477719e3d904888a2c0c0ce2d6a165a40ed71
3
  size 39131224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dbd99f50b057c2ca64df5b045da5907f6ca1180b620f814eaffc3df4f28fe17
3
  size 39131224
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.883495145631068,
3
+ "eval_loss": 0.12919174134731293,
4
+ "eval_runtime": 14.8882,
5
+ "eval_samples_per_second": 35.599,
6
+ "eval_steps_per_second": 4.5,
7
+ "total_flos": 255110322096000.0,
8
+ "train_loss": 0.48706390380859377,
9
+ "train_runtime": 923.0797,
10
+ "train_samples_per_second": 5.2,
11
+ "train_steps_per_second": 0.108
12
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.883495145631068,
3
+ "eval_loss": 0.12919174134731293,
4
+ "eval_runtime": 14.8882,
5
+ "eval_samples_per_second": 35.599,
6
+ "eval_steps_per_second": 4.5
7
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.883495145631068,
3
+ "total_flos": 255110322096000.0,
4
+ "train_loss": 0.48706390380859377,
5
+ "train_runtime": 923.0797,
6
+ "train_samples_per_second": 5.2,
7
+ "train_steps_per_second": 0.108
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.12923255562782288,
3
+ "best_model_checkpoint": "./HFTB-SmolLM2-135M-Instruct-OTCMedicinePHv2\\checkpoint-100",
4
+ "epoch": 3.883495145631068,
5
+ "eval_steps": 100,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 3.883495145631068,
13
+ "grad_norm": 0.22856882214546204,
14
+ "learning_rate": 0.0005,
15
+ "loss": 0.4871,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 3.883495145631068,
20
+ "eval_loss": 0.12923255562782288,
21
+ "eval_runtime": 14.6836,
22
+ "eval_samples_per_second": 36.095,
23
+ "eval_steps_per_second": 4.563,
24
+ "step": 100
25
+ },
26
+ {
27
+ "epoch": 3.883495145631068,
28
+ "step": 100,
29
+ "total_flos": 255110322096000.0,
30
+ "train_loss": 0.48706390380859377,
31
+ "train_runtime": 923.0797,
32
+ "train_samples_per_second": 5.2,
33
+ "train_steps_per_second": 0.108
34
+ }
35
+ ],
36
+ "logging_steps": 100,
37
+ "max_steps": 100,
38
+ "num_input_tokens_seen": 0,
39
+ "num_train_epochs": 4,
40
+ "save_steps": 100,
41
+ "stateful_callbacks": {
42
+ "EarlyStoppingCallback": {
43
+ "args": {
44
+ "early_stopping_patience": 3,
45
+ "early_stopping_threshold": 0.01
46
+ },
47
+ "attributes": {
48
+ "early_stopping_patience_counter": 0
49
+ }
50
+ },
51
+ "TrainerControl": {
52
+ "args": {
53
+ "should_epoch_stop": false,
54
+ "should_evaluate": false,
55
+ "should_log": false,
56
+ "should_save": true,
57
+ "should_training_stop": true
58
+ },
59
+ "attributes": {}
60
+ }
61
+ },
62
+ "total_flos": 255110322096000.0,
63
+ "train_batch_size": 4,
64
+ "trial_name": null,
65
+ "trial_params": null
66
+ }