Cahya Wirawan commited on
Commit
10b22bf
1 Parent(s): 163eb96

trained another 12 epochs

Browse files
Files changed (2) hide show
  1. config.json +17 -5
  2. pytorch_model.bin +1 -1
config.json CHANGED
@@ -13,6 +13,7 @@
13
  "bos_token_id": null,
14
  "chunk_size_feed_forward": 0,
15
  "decoder_start_token_id": null,
 
16
  "do_sample": false,
17
  "early_stopping": false,
18
  "eos_token_id": null,
@@ -41,16 +42,20 @@
41
  "model_type": "bert",
42
  "no_repeat_ngram_size": 0,
43
  "num_attention_heads": 12,
 
44
  "num_beams": 1,
45
  "num_hidden_layers": 12,
46
  "num_return_sequences": 1,
47
  "output_attentions": false,
48
  "output_hidden_states": false,
 
49
  "pad_token_id": 0,
 
50
  "prefix": null,
51
  "pruned_heads": {},
52
  "repetition_penalty": 1.0,
53
- "return_dict": false,
 
54
  "sep_token_id": null,
55
  "task_specific_params": null,
56
  "temperature": 1.0,
@@ -60,6 +65,7 @@
60
  "top_k": 50,
61
  "top_p": 1.0,
62
  "torchscript": false,
 
63
  "type_vocab_size": 2,
64
  "use_bfloat16": false,
65
  "use_cache": true,
@@ -79,6 +85,7 @@
79
  "bos_token_id": null,
80
  "chunk_size_feed_forward": 0,
81
  "decoder_start_token_id": null,
 
82
  "do_sample": false,
83
  "early_stopping": false,
84
  "eos_token_id": null,
@@ -107,16 +114,20 @@
107
  "model_type": "bert",
108
  "no_repeat_ngram_size": 0,
109
  "num_attention_heads": 12,
 
110
  "num_beams": 1,
111
  "num_hidden_layers": 12,
112
  "num_return_sequences": 1,
113
  "output_attentions": false,
114
  "output_hidden_states": false,
 
115
  "pad_token_id": 0,
 
116
  "prefix": null,
117
  "pruned_heads": {},
118
  "repetition_penalty": 1.0,
119
- "return_dict": false,
 
120
  "sep_token_id": null,
121
  "task_specific_params": null,
122
  "temperature": 1.0,
@@ -126,6 +137,7 @@
126
  "top_k": 50,
127
  "top_p": 1.0,
128
  "torchscript": false,
 
129
  "type_vocab_size": 2,
130
  "use_bfloat16": false,
131
  "use_cache": true,
@@ -135,11 +147,11 @@
135
  "eos_token_id": 1,
136
  "is_encoder_decoder": true,
137
  "length_penalty": 2.0,
138
- "max_length": 142,
139
- "min_length": 56,
140
  "model_type": "encoder-decoder",
141
  "no_repeat_ngram_size": 3,
142
- "num_beams": 4,
143
  "pad_token_id": 2,
144
  "vocab_size": 32000
145
  }
 
13
  "bos_token_id": null,
14
  "chunk_size_feed_forward": 0,
15
  "decoder_start_token_id": null,
16
+ "diversity_penalty": 0.0,
17
  "do_sample": false,
18
  "early_stopping": false,
19
  "eos_token_id": null,
 
42
  "model_type": "bert",
43
  "no_repeat_ngram_size": 0,
44
  "num_attention_heads": 12,
45
+ "num_beam_groups": 1,
46
  "num_beams": 1,
47
  "num_hidden_layers": 12,
48
  "num_return_sequences": 1,
49
  "output_attentions": false,
50
  "output_hidden_states": false,
51
+ "output_scores": false,
52
  "pad_token_id": 0,
53
+ "position_embedding_type": "absolute",
54
  "prefix": null,
55
  "pruned_heads": {},
56
  "repetition_penalty": 1.0,
57
+ "return_dict": true,
58
+ "return_dict_in_generate": false,
59
  "sep_token_id": null,
60
  "task_specific_params": null,
61
  "temperature": 1.0,
 
65
  "top_k": 50,
66
  "top_p": 1.0,
67
  "torchscript": false,
68
+ "transformers_version": "4.2.2",
69
  "type_vocab_size": 2,
70
  "use_bfloat16": false,
71
  "use_cache": true,
 
85
  "bos_token_id": null,
86
  "chunk_size_feed_forward": 0,
87
  "decoder_start_token_id": null,
88
+ "diversity_penalty": 0.0,
89
  "do_sample": false,
90
  "early_stopping": false,
91
  "eos_token_id": null,
 
114
  "model_type": "bert",
115
  "no_repeat_ngram_size": 0,
116
  "num_attention_heads": 12,
117
+ "num_beam_groups": 1,
118
  "num_beams": 1,
119
  "num_hidden_layers": 12,
120
  "num_return_sequences": 1,
121
  "output_attentions": false,
122
  "output_hidden_states": false,
123
+ "output_scores": false,
124
  "pad_token_id": 0,
125
+ "position_embedding_type": "absolute",
126
  "prefix": null,
127
  "pruned_heads": {},
128
  "repetition_penalty": 1.0,
129
+ "return_dict": true,
130
+ "return_dict_in_generate": false,
131
  "sep_token_id": null,
132
  "task_specific_params": null,
133
  "temperature": 1.0,
 
137
  "top_k": 50,
138
  "top_p": 1.0,
139
  "torchscript": false,
140
+ "transformers_version": "4.2.2",
141
  "type_vocab_size": 2,
142
  "use_bfloat16": false,
143
  "use_cache": true,
 
147
  "eos_token_id": 1,
148
  "is_encoder_decoder": true,
149
  "length_penalty": 2.0,
150
+ "max_length": 40,
151
+ "min_length": 20,
152
  "model_type": "encoder-decoder",
153
  "no_repeat_ngram_size": 3,
154
+ "num_beams": 10,
155
  "pad_token_id": 2,
156
  "vocab_size": 32000
157
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fff5b143c3a27cb464c410ffbc8f61f720242c8fbafba10fa314ef9a29813bd4
3
  size 998778130
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bce5258851b5aae797fd0ff45c1d52ce82105493144b1eaff6d0eb880b8d592c
3
  size 998778130