IDEA-CCNL
/

Erlangshen-Longformer-330M

Inference Endpoints

Model card Files Files and versions Community

roygan commited on Jul 17, 2022

Commit

a50c01a

•

1 Parent(s): 05fd5be

Update config.json

Files changed (1) hide show

config.json +31 -30

config.json CHANGED Viewed

@@ -1,30 +1,31 @@
-{
-  "architectures": [
-    "LongformerForPreTraining"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "attention_window": 128,
-  "use_sparse_attention":true,
-  "directionality": "bidi",
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 1024,
-  "initializer_range": 0.02,
-  "intermediate_size": 4096,
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 4096,
-  "model_type": "longformer",
-  "num_attention_heads": 16,
-  "num_hidden_layers": 24,
-  "pad_token_id": 0,
-  "pooler_fc_size": 1024,
-  "pooler_num_attention_heads": 16,
-  "pooler_num_fc_layers": 3,
-  "pooler_size_per_head": 128,
-  "pooler_type": "first_token_transform",
-  "type_vocab_size": 4,
-  "vocab_size": 12000
-}

+{
+  "architectures": [
+    "LongformerForPreTraining"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "attention_window": 128,
+  "use_sparse_attention":true,
+  "directionality": "bidi",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 4096,
+  "model_type": "longformer",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_fc_size": 1024,
+  "pooler_num_attention_heads": 16,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "tokenizer_class": "BertTokenizer",
+  "type_vocab_size": 4,
+  "vocab_size": 12000
+}