File size: 2,424 Bytes
68017bb
 
 
99f4d33
68017bb
b140801
68017bb
 
 
 
 
b140801
 
 
 
68017bb
 
 
b140801
 
 
 
68017bb
 
 
 
b140801
 
 
 
 
68017bb
 
b140801
 
 
 
 
68017bb
 
 
b140801
 
 
 
 
68017bb
 
b140801
 
 
 
 
68017bb
 
b140801
 
 
 
 
68017bb
 
 
b140801
 
 
 
 
68017bb
 
99f4d33
b140801
 
 
 
 
 
68017bb
 
 
b140801
68017bb
99f4d33
68017bb
 
 
 
 
 
 
 
 
 
 
 
 
b140801
68017bb
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 21,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.14285714285714285,
      "grad_norm": 99.75653578103409,
      "learning_rate": 6.666666666666667e-06,
      "loss": 1.7828,
      "step": 1
    },
    {
      "epoch": 0.7142857142857143,
      "grad_norm": 421.35114809358015,
      "learning_rate": 1.9396926207859085e-05,
      "loss": 1.8602,
      "step": 5
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.3279948234558105,
      "eval_runtime": 2.2021,
      "eval_samples_per_second": 39.054,
      "eval_steps_per_second": 0.908,
      "step": 7
    },
    {
      "epoch": 1.4285714285714286,
      "grad_norm": 60.4301410351058,
      "learning_rate": 1.342020143325669e-05,
      "loss": 2.9102,
      "step": 10
    },
    {
      "epoch": 2.0,
      "eval_loss": 0.6256123781204224,
      "eval_runtime": 2.0607,
      "eval_samples_per_second": 41.733,
      "eval_steps_per_second": 0.971,
      "step": 14
    },
    {
      "epoch": 2.142857142857143,
      "grad_norm": 9.507888146594869,
      "learning_rate": 5.000000000000003e-06,
      "loss": 0.686,
      "step": 15
    },
    {
      "epoch": 2.857142857142857,
      "grad_norm": 1.2620367321960468,
      "learning_rate": 1.519224698779198e-07,
      "loss": 0.5329,
      "step": 20
    },
    {
      "epoch": 3.0,
      "eval_loss": 0.5474353432655334,
      "eval_runtime": 2.0545,
      "eval_samples_per_second": 41.86,
      "eval_steps_per_second": 0.973,
      "step": 21
    },
    {
      "epoch": 3.0,
      "step": 21,
      "total_flos": 8793945538560.0,
      "train_loss": 1.4474536804925828,
      "train_runtime": 170.8306,
      "train_samples_per_second": 13.821,
      "train_steps_per_second": 0.123
    }
  ],
  "logging_steps": 5,
  "max_steps": 21,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 8793945538560.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}