File size: 2,274 Bytes
1894b93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 1713,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.2,
      "learning_rate": 1.9883720930232557e-05,
      "loss": 0.8354,
      "step": 114
    },
    {
      "epoch": 0.4,
      "learning_rate": 2.890979883192732e-05,
      "loss": 0.5722,
      "step": 228
    },
    {
      "epoch": 0.6,
      "learning_rate": 2.6690460739779364e-05,
      "loss": 0.4889,
      "step": 342
    },
    {
      "epoch": 0.8,
      "learning_rate": 2.4471122647631407e-05,
      "loss": 0.457,
      "step": 456
    },
    {
      "epoch": 1.0,
      "learning_rate": 2.2251784555483453e-05,
      "loss": 0.3767,
      "step": 570
    },
    {
      "epoch": 1.2,
      "learning_rate": 2.0032446463335497e-05,
      "loss": 0.3405,
      "step": 684
    },
    {
      "epoch": 1.4,
      "learning_rate": 1.781310837118754e-05,
      "loss": 0.324,
      "step": 798
    },
    {
      "epoch": 1.6,
      "learning_rate": 1.5593770279039583e-05,
      "loss": 0.3289,
      "step": 912
    },
    {
      "epoch": 1.8,
      "learning_rate": 1.3374432186891629e-05,
      "loss": 0.3386,
      "step": 1026
    },
    {
      "epoch": 2.0,
      "learning_rate": 1.1155094094743672e-05,
      "loss": 0.3321,
      "step": 1140
    },
    {
      "epoch": 2.2,
      "learning_rate": 8.935756002595717e-06,
      "loss": 0.3055,
      "step": 1254
    },
    {
      "epoch": 2.4,
      "learning_rate": 6.716417910447762e-06,
      "loss": 0.304,
      "step": 1368
    },
    {
      "epoch": 2.6,
      "learning_rate": 4.4970798182998056e-06,
      "loss": 0.2991,
      "step": 1482
    },
    {
      "epoch": 2.8,
      "learning_rate": 2.2777417261518495e-06,
      "loss": 0.3181,
      "step": 1596
    },
    {
      "epoch": 2.99,
      "learning_rate": 5.840363400389358e-08,
      "loss": 0.3057,
      "step": 1710
    }
  ],
  "logging_steps": 114,
  "max_steps": 1713,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 1.3925820288466944e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}