File size: 2,373 Bytes
48de83f
 
 
 
 
9a012c2
48de83f
 
 
 
 
9a012c2
4b3e94a
9a012c2
4b3e94a
48de83f
 
 
9a012c2
4b3e94a
9a012c2
4b3e94a
48de83f
 
 
9a012c2
4b3e94a
9a012c2
4b3e94a
48de83f
 
 
9a012c2
4b3e94a
9a012c2
4b3e94a
48de83f
 
 
9a012c2
4b3e94a
9a012c2
4b3e94a
48de83f
 
 
9a012c2
4b3e94a
9a012c2
4b3e94a
48de83f
 
6303895
9a012c2
4b3e94a
9a012c2
4b3e94a
6303895
 
 
9a012c2
4b3e94a
9a012c2
4b3e94a
6303895
 
48de83f
 
9a012c2
 
4b3e94a
 
 
 
48de83f
 
 
9a012c2
48de83f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a012c2
48de83f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.0,
  "eval_steps": 100,
  "global_step": 84,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.47619047619047616,
      "grad_norm": 2.757856845855713,
      "learning_rate": 0.0001761904761904762,
      "loss": 0.5843,
      "step": 10
    },
    {
      "epoch": 0.9523809523809523,
      "grad_norm": 3.4689693450927734,
      "learning_rate": 0.00015238095238095237,
      "loss": 0.5473,
      "step": 20
    },
    {
      "epoch": 1.4285714285714286,
      "grad_norm": 0.5666776299476624,
      "learning_rate": 0.00012857142857142858,
      "loss": 0.1833,
      "step": 30
    },
    {
      "epoch": 1.9047619047619047,
      "grad_norm": 0.2773081362247467,
      "learning_rate": 0.00010476190476190477,
      "loss": 0.1073,
      "step": 40
    },
    {
      "epoch": 2.380952380952381,
      "grad_norm": 0.14523948729038239,
      "learning_rate": 8.095238095238096e-05,
      "loss": 0.0217,
      "step": 50
    },
    {
      "epoch": 2.857142857142857,
      "grad_norm": 0.10562175512313843,
      "learning_rate": 5.714285714285714e-05,
      "loss": 0.0158,
      "step": 60
    },
    {
      "epoch": 3.3333333333333335,
      "grad_norm": 0.09110253304243088,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 0.0125,
      "step": 70
    },
    {
      "epoch": 3.8095238095238093,
      "grad_norm": 0.09965213388204575,
      "learning_rate": 9.523809523809523e-06,
      "loss": 0.0116,
      "step": 80
    },
    {
      "epoch": 4.0,
      "step": 84,
      "total_flos": 2.5417372593586176e+16,
      "train_loss": 0.17717325119745164,
      "train_runtime": 30.5569,
      "train_samples_per_second": 10.734,
      "train_steps_per_second": 2.749
    }
  ],
  "logging_steps": 10,
  "max_steps": 84,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.5417372593586176e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}