ayjays132 commited on
Commit
86b2f20
1 Parent(s): 84fff15

Upload 7 files

Browse files
config.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
- "model_type": "gpt2",
3
- "architectures": ["GPT2LMHeadModel"],
4
- "tokenizer_config": {
5
- "bos_token_id": 50256,
6
- "eos_token_id": 50256,
7
- "n_positions": 2048
8
- },
9
  "config": {
10
  "activation_function": "gelu_new",
 
 
 
 
11
  "n_ctx": 2048,
12
  "n_embd": 2048,
13
  "n_head": 16,
14
  "n_layer": 24,
15
  "n_positions": 2048,
16
  "n_special": 0,
17
- "attn_pdrop": 0.1,
18
- "embd_pdrop": 0.1,
19
- "initializer_range": 0.02,
20
- "layer_norm_epsilon": 1e-05,
21
  "resid_pdrop": 0.1,
22
  "summary_activation": null,
23
  "summary_first_dropout": 0.1,
@@ -25,53 +25,105 @@
25
  "summary_type": "cls_index",
26
  "summary_use_proj": true
27
  },
28
- "task_specific_params": {
29
- "conversational": {
30
- "max_length": 1024,
31
- "min_length": 20,
32
- "length_penalty": 1.5,
33
- "num_beams": 5,
34
- "early_stopping": true,
35
- "no_repeat_ngram_size": 3,
36
- "temperature": 0.7,
37
- "top_k": 50,
38
- "top_p": 0.9
39
- }
40
- },
41
- "transformers_version": "4.34.0",
42
- "language": ["en"],
43
- "tags": ["conversational"],
44
- "metrics": ["perplexity", "accuracy"],
45
- "pipeline_tag": "conversational",
46
- "library_name": "transformers",
47
- "datasets": ["vicgalle/alpaca-gpt4"],
48
- "license": "apache-2.0",
49
  "custom_params": {
 
 
 
50
  "adaptation_rate": 0.05,
 
51
  "desired_improvement_rate": 0.02,
52
  "ecosystem_dynamics": {
53
  "environmental_volatility": 0.1,
54
  "resource_pool": 1
55
  },
 
 
 
56
  "growth_improvement_threshold": 0.01,
57
  "hidden_dim": 2048,
58
- "initial_neuron_count": 5000,
59
  "innovative_growth_net": {
60
  "adaptation_rate": 0.05,
61
  "initial_capacity": 250000,
62
  "input_size": 2048
63
  },
64
- "input_dimension": 768,
 
65
  "low_stability_threshold": 0.01,
66
- "max_complexity": 10000,
67
- "max_neurons": 250000,
68
- "max_sequence_length": 1024,
69
  "min_epochs_before_growth": 5,
70
  "model_filename": "pytorch_model.bin",
 
71
  "num_embeddings": 25000,
 
 
72
  "pruning_improvement_threshold": 0.005,
 
73
  "some_adaptation_rate": 0.05,
 
74
  "stability_threshold": 0.02,
75
- "start_token_index": 2
76
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  }
 
1
  {
2
+ "_name_or_path": "ayjays132/Phillnet2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
  "config": {
10
  "activation_function": "gelu_new",
11
+ "attn_pdrop": 0.1,
12
+ "embd_pdrop": 0.1,
13
+ "initializer_range": 0.02,
14
+ "layer_norm_epsilon": 1e-05,
15
  "n_ctx": 2048,
16
  "n_embd": 2048,
17
  "n_head": 16,
18
  "n_layer": 24,
19
  "n_positions": 2048,
20
  "n_special": 0,
 
 
 
 
21
  "resid_pdrop": 0.1,
22
  "summary_activation": null,
23
  "summary_first_dropout": 0.1,
 
25
  "summary_type": "cls_index",
26
  "summary_use_proj": true
27
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "custom_params": {
29
+ "DEFAULT_ADAPTATION_RATE": 0.05,
30
+ "DEFAULT_MAX_NEURONS": 242798,
31
+ "actual_vocab_size": 25000,
32
  "adaptation_rate": 0.05,
33
+ "complexity_metric": null,
34
  "desired_improvement_rate": 0.02,
35
  "ecosystem_dynamics": {
36
  "environmental_volatility": 0.1,
37
  "resource_pool": 1
38
  },
39
+ "embedding_dim": 2048,
40
+ "eval_interval": 10,
41
+ "gamma": 0.99,
42
  "growth_improvement_threshold": 0.01,
43
  "hidden_dim": 2048,
44
+ "initial_neuron_count": 105000,
45
  "innovative_growth_net": {
46
  "adaptation_rate": 0.05,
47
  "initial_capacity": 250000,
48
  "input_size": 2048
49
  },
50
+ "input_dimension": 2048,
51
+ "learning_rate": 0.001,
52
  "low_stability_threshold": 0.01,
53
+ "max_complexity": 5000000,
54
+ "max_neurons": 800000,
55
+ "max_sequence_length": 2048,
56
  "min_epochs_before_growth": 5,
57
  "model_filename": "pytorch_model.bin",
58
+ "num_classes": 25000,
59
  "num_embeddings": 25000,
60
+ "num_heads": 64,
61
+ "num_words": 25000,
62
  "pruning_improvement_threshold": 0.005,
63
+ "sequence_length": 1024,
64
  "some_adaptation_rate": 0.05,
65
+ "some_intermediate_size": 2048,
66
  "stability_threshold": 0.02,
67
+ "start_token_index": 2,
68
+ "update_freq": 100,
69
+ "vocab_size": 25000,
70
+ "weight_decay": 0.005
71
+ },
72
+ "datasets": [
73
+ "vicgalle/alpaca-gpt4"
74
+ ],
75
+ "embd_pdrop": 0.1,
76
+ "eos_token_id": 50256,
77
+ "initializer_range": 0.02,
78
+ "language": [
79
+ "en"
80
+ ],
81
+ "layer_norm_epsilon": 1e-05,
82
+ "library_name": "transformers",
83
+ "license": "apache-2.0",
84
+ "metrics": [
85
+ "perplexity",
86
+ "accuracy"
87
+ ],
88
+ "model_type": "gpt2",
89
+ "n_embd": 768,
90
+ "n_head": 12,
91
+ "n_inner": null,
92
+ "n_layer": 12,
93
+ "n_positions": 1024,
94
+ "pipeline_tag": "conversational",
95
+ "reorder_and_upcast_attn": false,
96
+ "resid_pdrop": 0.1,
97
+ "scale_attn_by_inverse_layer_idx": false,
98
+ "scale_attn_weights": true,
99
+ "summary_activation": null,
100
+ "summary_first_dropout": 0.1,
101
+ "summary_proj_to_labels": true,
102
+ "summary_type": "cls_index",
103
+ "summary_use_proj": true,
104
+ "tags": [
105
+ "conversational"
106
+ ],
107
+ "task_specific_params": {
108
+ "conversational": {
109
+ "early_stopping": true,
110
+ "length_penalty": 1.5,
111
+ "max_length": 1024,
112
+ "min_length": 20,
113
+ "no_repeat_ngram_size": 3,
114
+ "num_beams": 5,
115
+ "temperature": 0.7,
116
+ "top_k": 50,
117
+ "top_p": 0.9
118
+ }
119
+ },
120
+ "tokenizer_config": {
121
+ "bos_token_id": 50256,
122
+ "eos_token_id": 50256,
123
+ "n_positions": 2048
124
+ },
125
+ "torch_dtype": "float32",
126
+ "transformers_version": "4.37.2",
127
+ "use_cache": true,
128
+ "vocab_size": 50257
129
  }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.37.2"
6
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f779615d12286a46511e2a045318ec6efc922e969ecdc4be9deb51882ba33ea7
3
+ size 995642298
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4458df18d15a0c76ca43f3bdfe7dffc0fe842fd5b118301979cd74c1fa701768
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b377bcf2fb431adbbcb1481ea1975ebb709f64959b03e92ca011ea3a0e14a18
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0475020475020473,
5
+ "eval_steps": 500,
6
+ "global_step": 10000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1,
13
+ "learning_rate": 4.8293748293748295e-05,
14
+ "loss": 4.3223,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.2,
19
+ "learning_rate": 4.658749658749659e-05,
20
+ "loss": 4.2808,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.31,
25
+ "learning_rate": 4.4881244881244886e-05,
26
+ "loss": 4.2576,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.41,
31
+ "learning_rate": 4.317499317499318e-05,
32
+ "loss": 4.1952,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.51,
37
+ "learning_rate": 4.146874146874147e-05,
38
+ "loss": 4.133,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.61,
43
+ "learning_rate": 3.976248976248976e-05,
44
+ "loss": 4.0789,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 0.72,
49
+ "learning_rate": 3.8056238056238055e-05,
50
+ "loss": 4.0146,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 0.82,
55
+ "learning_rate": 3.6349986349986354e-05,
56
+ "loss": 3.9639,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.92,
61
+ "learning_rate": 3.4643734643734647e-05,
62
+ "loss": 3.9444,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 1.02,
67
+ "learning_rate": 3.293748293748294e-05,
68
+ "loss": 3.8477,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 1.13,
73
+ "learning_rate": 3.123123123123123e-05,
74
+ "loss": 3.7137,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 1.23,
79
+ "learning_rate": 2.952497952497953e-05,
80
+ "loss": 3.6799,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 1.33,
85
+ "learning_rate": 2.781872781872782e-05,
86
+ "loss": 3.674,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 1.43,
91
+ "learning_rate": 2.6112476112476115e-05,
92
+ "loss": 3.6476,
93
+ "step": 7000
94
+ },
95
+ {
96
+ "epoch": 1.54,
97
+ "learning_rate": 2.4406224406224407e-05,
98
+ "loss": 3.6145,
99
+ "step": 7500
100
+ },
101
+ {
102
+ "epoch": 1.64,
103
+ "learning_rate": 2.2699972699972703e-05,
104
+ "loss": 3.5609,
105
+ "step": 8000
106
+ },
107
+ {
108
+ "epoch": 1.74,
109
+ "learning_rate": 2.0993720993720995e-05,
110
+ "loss": 3.5179,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 1.84,
115
+ "learning_rate": 1.928746928746929e-05,
116
+ "loss": 3.5089,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 1.95,
121
+ "learning_rate": 1.7581217581217583e-05,
122
+ "loss": 3.5192,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 2.05,
127
+ "learning_rate": 1.5874965874965875e-05,
128
+ "loss": 3.39,
129
+ "step": 10000
130
+ }
131
+ ],
132
+ "logging_steps": 500,
133
+ "max_steps": 14652,
134
+ "num_input_tokens_seen": 0,
135
+ "num_train_epochs": 3,
136
+ "save_steps": 10000,
137
+ "total_flos": 1.0451158695936e+16,
138
+ "train_batch_size": 4,
139
+ "trial_name": null,
140
+ "trial_params": null
141
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d538e97fb781d4d52eb5049837204dc5502fe82b56c0a46d2972d514d5318480
3
+ size 4792