File size: 2,807 Bytes
0e555b6
0e1a7a3
0e555b6
0e1a7a3
0e555b6
 
 
 
 
 
 
0e1a7a3
0e555b6
0e1a7a3
0e555b6
 
 
0e1a7a3
0e555b6
0e1a7a3
0e555b6
 
 
0e1a7a3
 
 
 
 
 
0e555b6
 
 
0e1a7a3
0e555b6
0e1a7a3
0e555b6
 
 
0e1a7a3
 
 
 
 
 
0e555b6
 
 
0e1a7a3
0e555b6
0e1a7a3
0e555b6
 
 
0e1a7a3
 
 
 
 
 
0e555b6
 
 
0e1a7a3
0e555b6
0e1a7a3
0e555b6
 
 
0e1a7a3
 
 
 
 
 
0e555b6
 
 
0e1a7a3
0e555b6
0e1a7a3
0e555b6
 
 
0e1a7a3
 
 
 
 
 
0e555b6
 
 
0e1a7a3
0e555b6
0e1a7a3
0e555b6
 
 
0e1a7a3
 
 
 
 
 
0e555b6
 
 
 
 
0e1a7a3
0e555b6
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{
  "best_metric": 1.0993762016296387,
  "best_model_checkpoint": "/kaggle/output/checkpoint-6000",
  "epoch": 0.24445893089960888,
  "eval_steps": 1000,
  "global_step": 6000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 2.7777777777777777e-11,
      "loss": 1.1383,
      "step": 1
    },
    {
      "epoch": 0.04,
      "learning_rate": 2.7750000000000004e-08,
      "loss": 1.1424,
      "step": 1000
    },
    {
      "epoch": 0.04,
      "eval_accuracy": 0.32375249500998005,
      "eval_loss": 1.1077626943588257,
      "eval_runtime": 54.8633,
      "eval_samples_per_second": 91.318,
      "eval_steps_per_second": 11.428,
      "step": 1000
    },
    {
      "epoch": 0.08,
      "learning_rate": 5.5527777777777784e-08,
      "loss": 1.1244,
      "step": 2000
    },
    {
      "epoch": 0.08,
      "eval_accuracy": 0.33652694610778444,
      "eval_loss": 1.1080161333084106,
      "eval_runtime": 54.7384,
      "eval_samples_per_second": 91.526,
      "eval_steps_per_second": 11.454,
      "step": 2000
    },
    {
      "epoch": 0.12,
      "learning_rate": 8.327777777777778e-08,
      "loss": 1.1228,
      "step": 3000
    },
    {
      "epoch": 0.12,
      "eval_accuracy": 0.34331337325349304,
      "eval_loss": 1.1084064245224,
      "eval_runtime": 54.7948,
      "eval_samples_per_second": 91.432,
      "eval_steps_per_second": 11.443,
      "step": 3000
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.1105555555555557e-07,
      "loss": 1.1216,
      "step": 4000
    },
    {
      "epoch": 0.16,
      "eval_accuracy": 0.3385229540918164,
      "eval_loss": 1.1014840602874756,
      "eval_runtime": 54.8508,
      "eval_samples_per_second": 91.339,
      "eval_steps_per_second": 11.431,
      "step": 4000
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.3880555555555558e-07,
      "loss": 1.1181,
      "step": 5000
    },
    {
      "epoch": 0.2,
      "eval_accuracy": 0.33073852295409184,
      "eval_loss": 1.1008135080337524,
      "eval_runtime": 54.8304,
      "eval_samples_per_second": 91.373,
      "eval_steps_per_second": 11.435,
      "step": 5000
    },
    {
      "epoch": 0.24,
      "learning_rate": 1.6658333333333335e-07,
      "loss": 1.1132,
      "step": 6000
    },
    {
      "epoch": 0.24,
      "eval_accuracy": 0.3520958083832335,
      "eval_loss": 1.0993762016296387,
      "eval_runtime": 54.8804,
      "eval_samples_per_second": 91.289,
      "eval_steps_per_second": 11.425,
      "step": 6000
    }
  ],
  "logging_steps": 1000,
  "max_steps": 10000000,
  "num_train_epochs": 408,
  "save_steps": 1000,
  "total_flos": 1.2542130782208e+16,
  "trial_name": null,
  "trial_params": null
}