vitvit commited on
Commit
2ca3aaa
1 Parent(s): deaf77e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +14 -2
README.md CHANGED
@@ -1,5 +1,7 @@
1
- ```python
2
 
 
 
3
  from transformers import RobertaTokenizerFast, AutoModelForTokenClassification
4
  from datasets import load_dataset
5
 
@@ -37,12 +39,22 @@ def tokenize_and_align_labels(examples):
37
  tokenized_inputs["labels"] = labels
38
  return tokenized_inputs
39
 
40
- model = AutoModelForTokenClassification.from_pretrained('HeTree/HeConE') # same as bert?? so no wories
41
  tokenizer = RobertaTokenizerFast.from_pretrained('HeTree/HeConE')
42
  raw_dataset = load_dataset('HeTree/MevakerConcSen')
43
  window_size = 5
44
  raw_dataset_window = raw_dataset.map(split_into_windows, batched=True, batch_size=window_size, remove_columns=raw_dataset['train'].column_names)
45
  tokenized_dataset = raw_dataset_window.map(tokenize_and_align_labels, batched=False)
 
46
 
 
47
 
 
 
 
 
 
 
 
 
48
  ```
 
1
+ ## Hebrew Conclusion Extraction Model (based on token classification)
2
 
3
+ #### How to use
4
+ ```python
5
  from transformers import RobertaTokenizerFast, AutoModelForTokenClassification
6
  from datasets import load_dataset
7
 
 
39
  tokenized_inputs["labels"] = labels
40
  return tokenized_inputs
41
 
42
+ model = AutoModelForTokenClassification.from_pretrained('HeTree/HeConE')
43
  tokenizer = RobertaTokenizerFast.from_pretrained('HeTree/HeConE')
44
  raw_dataset = load_dataset('HeTree/MevakerConcSen')
45
  window_size = 5
46
  raw_dataset_window = raw_dataset.map(split_into_windows, batched=True, batch_size=window_size, remove_columns=raw_dataset['train'].column_names)
47
  tokenized_dataset = raw_dataset_window.map(tokenize_and_align_labels, batched=False)
48
+ ```
49
 
50
+ ### Citing
51
 
52
+ If you use HeConE in your research, please cite [HeRo: RoBERTa and Longformer Hebrew Language Models](http://arxiv.org/abs/2304.11077).
53
+ ```
54
+ @article{shalumov2023hero,
55
+ title={HeRo: RoBERTa and Longformer Hebrew Language Models},
56
+ author={Vitaly Shalumov and Harel Haskey},
57
+ year={2023},
58
+ journal={arXiv:2304.11077},
59
+ }
60
  ```