pkarypis commited on
Commit
1fc19cf
1 Parent(s): 9153f52

Model save

Browse files
README.md CHANGED
@@ -19,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  This model is a fine-tuned version of [facebook/opt-6.7b](https://huggingface.co/facebook/opt-6.7b) on the generator dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 1.4492
23
 
24
  ## Model description
25
 
@@ -48,14 +48,14 @@ The following hyperparameters were used during training:
48
  - total_eval_batch_size: 512
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: cosine
51
- - training_steps: 20
52
  - mixed_precision_training: Native AMP
53
 
54
  ### Training results
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
- | 1.8817 | 0.08 | 20 | 1.4492 |
59
 
60
 
61
  ### Framework versions
 
19
 
20
  This model is a fine-tuned version of [facebook/opt-6.7b](https://huggingface.co/facebook/opt-6.7b) on the generator dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 1.2324
23
 
24
  ## Model description
25
 
 
48
  - total_eval_batch_size: 512
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: cosine
51
+ - num_epochs: 1.0
52
  - mixed_precision_training: Native AMP
53
 
54
  ### Training results
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
+ | 1.8817 | 1.0 | 253 | 1.2324 |
59
 
60
 
61
  ### Framework versions
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 0.08,
3
- "eval_loss": 1.44921875,
4
- "eval_runtime": 79.3699,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 179.904,
7
- "eval_steps_per_second": 0.353,
8
- "train_loss": 1.8745559692382812,
9
- "train_runtime": 266.5177,
10
  "train_samples": 207865,
11
- "train_samples_per_second": 38.421,
12
- "train_steps_per_second": 0.075
13
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_loss": 1.232421875,
4
+ "eval_runtime": 77.6546,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 183.878,
7
+ "eval_steps_per_second": 0.361,
8
+ "train_loss": 1.3124533189615242,
9
+ "train_runtime": 2403.4192,
10
  "train_samples": 207865,
11
+ "train_samples_per_second": 53.713,
12
+ "train_steps_per_second": 0.105
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.08,
3
- "eval_loss": 1.44921875,
4
- "eval_runtime": 79.3699,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 179.904,
7
- "eval_steps_per_second": 0.353
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_loss": 1.232421875,
4
+ "eval_runtime": 77.6546,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 183.878,
7
+ "eval_steps_per_second": 0.361
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65a238c8eb52abdad746179f691016350d45345a2bd1bf5c1a065ab1f2a0c603
3
  size 4993283928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c4289f04d4edc6c9315e9815ea32582470bc39aa823b7d35e28ae54a35cdba4
3
  size 4993283928
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e2ae75313cd9c65c7b2a39d4c0919924c95434c0f304d74177ea54327f9e9db
3
  size 4967389600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e52c3e8e8c71be2c0f021aa4333e23add6b2d48f1876364ed2706d848d099ddd
3
  size 4967389600
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf36073a57afed545b43c124b7287ba64a9b93035a5d3baba829fece44d33404
3
  size 3356335328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca916dd5bca30e94bb8358e1838ad53fefa61680ba94e8a2ac4e02ff262e18b7
3
  size 3356335328
runs/Jan03_19-09-43_aga39/events.out.tfevents.1704330610.aga39.146871.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4434bcdf73670fc87228b21722263b172730b7ef996306524302b7d9f6f8cfed
3
+ size 5261
runs/Jan03_19-09-43_aga39/events.out.tfevents.1704333092.aga39.146871.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dac238c4d2f4e5beef93d30f9d9a18f2ed6034e3a005b680ec561eecb56121f
3
+ size 359
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.08,
3
- "train_loss": 1.8745559692382812,
4
- "train_runtime": 266.5177,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 38.421,
7
- "train_steps_per_second": 0.075
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.3124533189615242,
4
+ "train_runtime": 2403.4192,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 53.713,
7
+ "train_steps_per_second": 0.105
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.07905138339920949,
5
  "eval_steps": 500,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -15,29 +15,29 @@
15
  "step": 1
16
  },
17
  {
18
- "epoch": 0.08,
19
- "eval_loss": 1.44921875,
20
- "eval_runtime": 79.4742,
21
- "eval_samples_per_second": 179.668,
22
- "eval_steps_per_second": 0.352,
23
- "step": 20
24
  },
25
  {
26
- "epoch": 0.08,
27
- "step": 20,
28
- "total_flos": 32315333935104.0,
29
- "train_loss": 1.8745559692382812,
30
- "train_runtime": 266.5177,
31
- "train_samples_per_second": 38.421,
32
- "train_steps_per_second": 0.075
33
  }
34
  ],
35
  "logging_steps": 500,
36
- "max_steps": 20,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 1,
39
  "save_steps": 500,
40
- "total_flos": 32315333935104.0,
41
  "train_batch_size": 16,
42
  "trial_name": null,
43
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 253,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
15
  "step": 1
16
  },
17
  {
18
+ "epoch": 1.0,
19
+ "eval_loss": 1.232421875,
20
+ "eval_runtime": 77.7676,
21
+ "eval_samples_per_second": 183.611,
22
+ "eval_steps_per_second": 0.36,
23
+ "step": 253
24
  },
25
  {
26
+ "epoch": 1.0,
27
+ "step": 253,
28
+ "total_flos": 2542139602894848.0,
29
+ "train_loss": 1.3124533189615242,
30
+ "train_runtime": 2403.4192,
31
+ "train_samples_per_second": 53.713,
32
+ "train_steps_per_second": 0.105
33
  }
34
  ],
35
  "logging_steps": 500,
36
+ "max_steps": 253,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 1,
39
  "save_steps": 500,
40
+ "total_flos": 2542139602894848.0,
41
  "train_batch_size": 16,
42
  "trial_name": null,
43
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:402b64c314fb1d2ccb1804f480a62b3e5ef5e1207665ac66ee405e3f0121a314
3
  size 5307
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:078ba5ed03c0d0b0273bc2394994d489301db1cb89ad2153078ce344f565055e
3
  size 5307