ymcui
commited on
Commit
•
982a35a
1
Parent(s):
c2a32bf
First version of the rbt6 model and tokenizer.
Browse files- README.md +54 -0
- added_tokens.json +1 -0
- config.json +27 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tf_model.h5 +3 -0
- tokenizer_config.json +1 -0
- vocab.txt +0 -0
README.md
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- zh
|
4 |
+
tags:
|
5 |
+
- bert
|
6 |
+
license: "apache-2.0"
|
7 |
+
---
|
8 |
+
## Chinese BERT with Whole Word Masking
|
9 |
+
For further accelerating Chinese natural language processing, we provide **Chinese pre-trained BERT with Whole Word Masking**.
|
10 |
+
|
11 |
+
**[Pre-Training with Whole Word Masking for Chinese BERT](https://arxiv.org/abs/1906.08101)**
|
12 |
+
Yiming Cui, Wanxiang Che, Ting Liu, Bing Qin, Ziqing Yang, Shijin Wang, Guoping Hu
|
13 |
+
|
14 |
+
This repository is developed based on:https://github.com/google-research/bert
|
15 |
+
|
16 |
+
You may also interested in,
|
17 |
+
- Chinese BERT series: https://github.com/ymcui/Chinese-BERT-wwm
|
18 |
+
- Chinese MacBERT: https://github.com/ymcui/MacBERT
|
19 |
+
- Chinese ELECTRA: https://github.com/ymcui/Chinese-ELECTRA
|
20 |
+
- Chinese XLNet: https://github.com/ymcui/Chinese-XLNet
|
21 |
+
- Knowledge Distillation Toolkit - TextBrewer: https://github.com/airaria/TextBrewer
|
22 |
+
|
23 |
+
More resources by HFL: https://github.com/ymcui/HFL-Anthology
|
24 |
+
|
25 |
+
## Citation
|
26 |
+
If you find the technical report or resource is useful, please cite the following technical report in your paper.
|
27 |
+
- Primary: https://arxiv.org/abs/2004.13922
|
28 |
+
```
|
29 |
+
@inproceedings{cui-etal-2020-revisiting,
|
30 |
+
title = "Revisiting Pre-Trained Models for {C}hinese Natural Language Processing",
|
31 |
+
author = "Cui, Yiming and
|
32 |
+
Che, Wanxiang and
|
33 |
+
Liu, Ting and
|
34 |
+
Qin, Bing and
|
35 |
+
Wang, Shijin and
|
36 |
+
Hu, Guoping",
|
37 |
+
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: Findings",
|
38 |
+
month = nov,
|
39 |
+
year = "2020",
|
40 |
+
address = "Online",
|
41 |
+
publisher = "Association for Computational Linguistics",
|
42 |
+
url = "https://www.aclweb.org/anthology/2020.findings-emnlp.58",
|
43 |
+
pages = "657--668",
|
44 |
+
}
|
45 |
+
```
|
46 |
+
- Secondary: https://arxiv.org/abs/1906.08101
|
47 |
+
```
|
48 |
+
@article{chinese-bert-wwm,
|
49 |
+
title={Pre-Training with Whole Word Masking for Chinese BERT},
|
50 |
+
author={Cui, Yiming and Che, Wanxiang and Liu, Ting and Qin, Bing and Yang, Ziqing and Wang, Shijin and Hu, Guoping},
|
51 |
+
journal={arXiv preprint arXiv:1906.08101},
|
52 |
+
year={2019}
|
53 |
+
}
|
54 |
+
```
|
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "temp/rbt6",
|
3 |
+
"attention_probs_dropout_prob": 0.1,
|
4 |
+
"directionality": "bidi",
|
5 |
+
"gradient_checkpointing": false,
|
6 |
+
"hidden_act": "gelu",
|
7 |
+
"hidden_dropout_prob": 0.1,
|
8 |
+
"hidden_size": 768,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 3072,
|
11 |
+
"layer_norm_eps": 1e-12,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"model_type": "bert",
|
14 |
+
"num_attention_heads": 12,
|
15 |
+
"num_hidden_layers": 6,
|
16 |
+
"pad_token_id": 0,
|
17 |
+
"pooler_fc_size": 768,
|
18 |
+
"pooler_num_attention_heads": 12,
|
19 |
+
"pooler_num_fc_layers": 3,
|
20 |
+
"pooler_size_per_head": 128,
|
21 |
+
"pooler_type": "first_token_transform",
|
22 |
+
"position_embedding_type": "absolute",
|
23 |
+
"transformers_version": "4.2.2",
|
24 |
+
"type_vocab_size": 2,
|
25 |
+
"use_cache": true,
|
26 |
+
"vocab_size": 21128
|
27 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5341c17ef5f3b028d0b2aa74d13ea5851ef17e456864ddffae3e84e5abd4109
|
3 |
+
size 241470166
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tf_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f817eb6fa00515d7d2d12f60d81b5d08adb1339176c083168300da64643e577
|
3 |
+
size 308076040
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"init_inputs": []}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|