davda54 commited on
Commit
796464e
1 Parent(s): 75ff232

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +24 -4
README.md CHANGED
@@ -41,8 +41,8 @@ This model currently needs a custom wrapper from `modeling_ltgbert.py`, you shou
41
  import torch
42
  from transformers import AutoTokenizer, AutoModelForMaskedLM
43
 
44
- tokenizer = AutoTokenizer.from_pretrained("HPLT/hplt_bert_base_en")
45
- model = AutoModelForMaskedLM.from_pretrained("HPLT/hplt_bert_base_en", trust_remote_code=True)
46
 
47
  mask_id = tokenizer.convert_tokens_to_ids("[MASK]")
48
  input_text = tokenizer("It's a beautiful[MASK].", return_tensors="pt")
@@ -61,18 +61,38 @@ We are releasing 10 intermediate checkpoints for each model at intervals of ever
61
 
62
  You can load a specific model revision with `transformers` using the argument `revision`:
63
  ```python
64
- model = AutoModelForMaskedLM.from_pretrained("HPLT/hplt_bert_base_en", revision="step21875", trust_remote_code=True)
65
  ```
66
 
67
  You can access all the revisions for the models with the following code:
68
  ```python
69
  from huggingface_hub import list_repo_refs
70
- out = list_repo_refs("HPLT/hplt_bert_base_en")
71
  print([b.name for b in out.branches])
72
  ```
73
 
74
  ## Cite us
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  ```bibtex
77
  @inproceedings{de-gibert-etal-2024-new-massive,
78
  title = "A New Massive Multilingual Dataset for High-Performance Language Technologies",
 
41
  import torch
42
  from transformers import AutoTokenizer, AutoModelForMaskedLM
43
 
44
+ tokenizer = AutoTokenizer.from_pretrained("HPLT/hplt_bert_base_eu")
45
+ model = AutoModelForMaskedLM.from_pretrained("HPLT/hplt_bert_base_eu", trust_remote_code=True)
46
 
47
  mask_id = tokenizer.convert_tokens_to_ids("[MASK]")
48
  input_text = tokenizer("It's a beautiful[MASK].", return_tensors="pt")
 
61
 
62
  You can load a specific model revision with `transformers` using the argument `revision`:
63
  ```python
64
+ model = AutoModelForMaskedLM.from_pretrained("HPLT/hplt_bert_base_eu", revision="step21875", trust_remote_code=True)
65
  ```
66
 
67
  You can access all the revisions for the models with the following code:
68
  ```python
69
  from huggingface_hub import list_repo_refs
70
+ out = list_repo_refs("HPLT/hplt_bert_base_eu")
71
  print([b.name for b in out.branches])
72
  ```
73
 
74
  ## Cite us
75
 
76
+ ```bibtex
77
+ @inproceedings{samuel-etal-2023-trained,
78
+ title = "Trained on 100 million words and still in shape: {BERT} meets {B}ritish {N}ational {C}orpus",
79
+ author = "Samuel, David and
80
+ Kutuzov, Andrey and
81
+ {\O}vrelid, Lilja and
82
+ Velldal, Erik",
83
+ editor = "Vlachos, Andreas and
84
+ Augenstein, Isabelle",
85
+ booktitle = "Findings of the Association for Computational Linguistics: EACL 2023",
86
+ month = may,
87
+ year = "2023",
88
+ address = "Dubrovnik, Croatia",
89
+ publisher = "Association for Computational Linguistics",
90
+ url = "https://aclanthology.org/2023.findings-eacl.146",
91
+ doi = "10.18653/v1/2023.findings-eacl.146",
92
+ pages = "1954--1974"
93
+ })
94
+ ```
95
+
96
  ```bibtex
97
  @inproceedings{de-gibert-etal-2024-new-massive,
98
  title = "A New Massive Multilingual Dataset for High-Performance Language Technologies",