File size: 2,378 Bytes
48de83f
 
 
 
 
9a012c2
48de83f
 
 
 
 
9a012c2
552831d
d1ea928
552831d
48de83f
 
 
9a012c2
552831d
d1ea928
552831d
48de83f
 
 
9a012c2
552831d
d1ea928
552831d
48de83f
 
 
9a012c2
552831d
d1ea928
552831d
48de83f
 
 
9a012c2
552831d
d1ea928
552831d
48de83f
 
 
9a012c2
552831d
d1ea928
552831d
48de83f
 
6303895
9a012c2
552831d
d1ea928
552831d
6303895
 
 
9a012c2
552831d
d1ea928
552831d
6303895
 
48de83f
 
9a012c2
552831d
 
 
 
 
48de83f
 
 
9a012c2
48de83f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552831d
48de83f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.0,
  "eval_steps": 100,
  "global_step": 84,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.47619047619047616,
      "grad_norm": 6.1757893562316895,
      "learning_rate": 0.0001785714285714286,
      "loss": 0.5829,
      "step": 10
    },
    {
      "epoch": 0.9523809523809523,
      "grad_norm": 3.337554931640625,
      "learning_rate": 0.00015476190476190478,
      "loss": 0.7423,
      "step": 20
    },
    {
      "epoch": 1.4285714285714286,
      "grad_norm": 0.07775535434484482,
      "learning_rate": 0.00013095238095238096,
      "loss": 0.0198,
      "step": 30
    },
    {
      "epoch": 1.9047619047619047,
      "grad_norm": 0.0923432782292366,
      "learning_rate": 0.00010714285714285715,
      "loss": 0.2508,
      "step": 40
    },
    {
      "epoch": 2.380952380952381,
      "grad_norm": 0.010310073383152485,
      "learning_rate": 8.333333333333334e-05,
      "loss": 0.0006,
      "step": 50
    },
    {
      "epoch": 2.857142857142857,
      "grad_norm": 0.022837914526462555,
      "learning_rate": 5.9523809523809524e-05,
      "loss": 0.0011,
      "step": 60
    },
    {
      "epoch": 3.3333333333333335,
      "grad_norm": 0.0032586820889264345,
      "learning_rate": 3.571428571428572e-05,
      "loss": 0.0002,
      "step": 70
    },
    {
      "epoch": 3.8095238095238093,
      "grad_norm": 0.00261130603030324,
      "learning_rate": 1.1904761904761905e-05,
      "loss": 0.0001,
      "step": 80
    },
    {
      "epoch": 4.0,
      "step": 84,
      "total_flos": 2.5417372593586176e+16,
      "train_loss": 0.1902264037302562,
      "train_runtime": 30.4122,
      "train_samples_per_second": 10.785,
      "train_steps_per_second": 2.762
    }
  ],
  "logging_steps": 10,
  "max_steps": 84,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.5417372593586176e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}