File size: 2,375 Bytes
48de83f
 
 
 
 
9a012c2
48de83f
 
 
 
 
9a012c2
d1ea928
 
 
48de83f
 
 
9a012c2
d1ea928
 
 
48de83f
 
 
9a012c2
d1ea928
 
 
48de83f
 
 
9a012c2
d1ea928
 
 
48de83f
 
 
9a012c2
d1ea928
 
 
48de83f
 
 
9a012c2
d1ea928
 
 
48de83f
 
6303895
9a012c2
d1ea928
 
 
6303895
 
 
9a012c2
d1ea928
 
 
6303895
 
48de83f
 
9a012c2
d1ea928
 
 
 
 
48de83f
 
 
9a012c2
48de83f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1ea928
48de83f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.0,
  "eval_steps": 100,
  "global_step": 84,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.47619047619047616,
      "grad_norm": 9.25047492980957,
      "learning_rate": 0.0001785714285714286,
      "loss": 0.8938,
      "step": 10
    },
    {
      "epoch": 0.9523809523809523,
      "grad_norm": 13.209284782409668,
      "learning_rate": 0.00015476190476190478,
      "loss": 0.4135,
      "step": 20
    },
    {
      "epoch": 1.4285714285714286,
      "grad_norm": 0.18857085704803467,
      "learning_rate": 0.00013095238095238096,
      "loss": 0.1841,
      "step": 30
    },
    {
      "epoch": 1.9047619047619047,
      "grad_norm": 3.675966262817383,
      "learning_rate": 0.00010714285714285715,
      "loss": 0.2689,
      "step": 40
    },
    {
      "epoch": 2.380952380952381,
      "grad_norm": 0.023952171206474304,
      "learning_rate": 8.333333333333334e-05,
      "loss": 0.276,
      "step": 50
    },
    {
      "epoch": 2.857142857142857,
      "grad_norm": 0.010658332146704197,
      "learning_rate": 5.9523809523809524e-05,
      "loss": 0.1114,
      "step": 60
    },
    {
      "epoch": 3.3333333333333335,
      "grad_norm": 0.013588939793407917,
      "learning_rate": 3.571428571428572e-05,
      "loss": 0.0018,
      "step": 70
    },
    {
      "epoch": 3.8095238095238093,
      "grad_norm": 0.011217363178730011,
      "learning_rate": 1.1904761904761905e-05,
      "loss": 0.0004,
      "step": 80
    },
    {
      "epoch": 4.0,
      "step": 84,
      "total_flos": 2.6605778719997952e+17,
      "train_loss": 0.25595739838622866,
      "train_runtime": 81.3097,
      "train_samples_per_second": 4.034,
      "train_steps_per_second": 1.033
    }
  ],
  "logging_steps": 10,
  "max_steps": 84,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.6605778719997952e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}