Uploading the Model
Browse files- README.md +39 -0
- config.json +26 -0
- pytorch_model.bin +3 -0
- vocab.txt +0 -0
README.md
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BioM-Transformers: Building Large Biomedical Language Models with
|
2 |
+
BERT, ALBERT and ELECTRA
|
3 |
+
|
4 |
+
Abstract
|
5 |
+
|
6 |
+
|
7 |
+
The impact of design choices on the performance
|
8 |
+
of biomedical language models recently
|
9 |
+
has been a subject for investigation. In
|
10 |
+
this paper, we empirically study biomedical
|
11 |
+
domain adaptation with large transformer models
|
12 |
+
using different design choices. We evaluate
|
13 |
+
the performance of our pretrained models
|
14 |
+
against other existing biomedical language
|
15 |
+
models in the literature. Our results show that
|
16 |
+
we achieve state-of-the-art results on several
|
17 |
+
biomedical domain tasks despite using similar
|
18 |
+
or less computational cost compared to other
|
19 |
+
models in the literature. Our findings highlight
|
20 |
+
the significant effect of design choices on
|
21 |
+
improving the performance of biomedical language
|
22 |
+
models.
|
23 |
+
|
24 |
+
This model was pre-trained on PubMed Abstracts only with biomedical domain vocabulary for 500K steps with a batch size of 1024 on TPUv3-32 unit.
|
25 |
+
```bibtex
|
26 |
+
@inproceedings{alrowili-shanker-2021-biom,
|
27 |
+
title = "{B}io{M}-Transformers: Building Large Biomedical Language Models with {BERT}, {ALBERT} and {ELECTRA}",
|
28 |
+
author = "Alrowili, Sultan and
|
29 |
+
Shanker, Vijay",
|
30 |
+
booktitle = "Proceedings of the 20th Workshop on Biomedical Language Processing",
|
31 |
+
month = jun,
|
32 |
+
year = "2021",
|
33 |
+
address = "Online",
|
34 |
+
publisher = "Association for Computational Linguistics",
|
35 |
+
url = "https://www.aclweb.org/anthology/2021.bionlp-1.24",
|
36 |
+
pages = "221--227",
|
37 |
+
abstract = "The impact of design choices on the performance of biomedical language models recently has been a subject for investigation. In this paper, we empirically study biomedical domain adaptation with large transformer models using different design choices. We evaluate the performance of our pretrained models against other existing biomedical language models in the literature. Our results show that we achieve state-of-the-art results on several biomedical domain tasks despite using similar or less computational cost compared to other models in the literature. Our findings highlight the significant effect of design choices on improving the performance of biomedical language models.",
|
38 |
+
}
|
39 |
+
```
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"ElectraForPreTraining"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"embedding_size": 768,
|
7 |
+
"hidden_act": "gelu",
|
8 |
+
"hidden_dropout_prob": 0.1,
|
9 |
+
"hidden_size": 768,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 3072,
|
12 |
+
"layer_norm_eps": 1e-12,
|
13 |
+
"max_position_embeddings": 512,
|
14 |
+
"model_type": "electra",
|
15 |
+
"num_attention_heads": 12,
|
16 |
+
"num_hidden_layers": 12,
|
17 |
+
"pad_token_id": 0,
|
18 |
+
"position_embedding_type": "absolute",
|
19 |
+
"summary_activation": "gelu",
|
20 |
+
"summary_last_dropout": 0.1,
|
21 |
+
"summary_type": "first",
|
22 |
+
"summary_use_proj": true,
|
23 |
+
"transformers_version": "4.6.0.dev0",
|
24 |
+
"type_vocab_size": 2,
|
25 |
+
"vocab_size": 28895
|
26 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:377dc8761b4f56621e31e1ee7c80ca7cc0c16d11e55172bc21e5fef8be67e87c
|
3 |
+
size 433021769
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|