File size: 2,344 Bytes
58f5030
 
 
 
 
 
 
 
 
 
 
 
b2763b2
 
 
 
 
58f5030
 
 
 
b2763b2
6df7778
b2763b2
58f5030
 
 
 
b2763b2
 
 
 
 
58f5030
 
 
 
b2763b2
 
 
 
 
58f5030
 
 
 
b2763b2
6df7778
b2763b2
58f5030
 
 
 
6df7778
b2763b2
 
 
 
58f5030
 
 
 
b2763b2
6df7778
b2763b2
58f5030
 
 
 
6df7778
58f5030
6df7778
58f5030
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2763b2
6df7778
b2763b2
58f5030
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.716981132075472,
  "eval_steps": 500,
  "global_step": 1500,
  "is_hyper_param_search": true,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "eval_accuracy": 0.5887096774193549,
      "eval_loss": 0.19886387884616852,
      "eval_runtime": 5.1582,
      "eval_samples_per_second": 600.982,
      "eval_steps_per_second": 12.601,
      "step": 318
    },
    {
      "epoch": 1.5723270440251573,
      "grad_norm": 0.5294517874717712,
      "learning_rate": 1.650593990216632e-05,
      "loss": 0.3203,
      "step": 500
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.8193548387096774,
      "eval_loss": 0.09618715196847916,
      "eval_runtime": 5.4445,
      "eval_samples_per_second": 569.379,
      "eval_steps_per_second": 11.939,
      "step": 636
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.8783870967741936,
      "eval_loss": 0.06421981006860733,
      "eval_runtime": 5.3341,
      "eval_samples_per_second": 581.17,
      "eval_steps_per_second": 12.186,
      "step": 954
    },
    {
      "epoch": 3.1446540880503147,
      "grad_norm": 0.44660821557044983,
      "learning_rate": 1.3011879804332637e-05,
      "loss": 0.1116,
      "step": 1000
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.8996774193548387,
      "eval_loss": 0.04962550476193428,
      "eval_runtime": 5.1177,
      "eval_samples_per_second": 605.746,
      "eval_steps_per_second": 12.701,
      "step": 1272
    },
    {
      "epoch": 4.716981132075472,
      "grad_norm": 0.3033629357814789,
      "learning_rate": 9.517819706498952e-06,
      "loss": 0.0713,
      "step": 1500
    }
  ],
  "logging_steps": 500,
  "max_steps": 2862,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 9,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 391368939443328.0,
  "train_batch_size": 48,
  "trial_name": null,
  "trial_params": {
    "alpha": 0.38078945785669316,
    "num_train_epochs": 9,
    "temperature": 11
  }
}