fixes.
Browse files- .gitignore +1 -0
- README.md +5 -2
- test.py +1 -0
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
ckpt/
|
2 |
*.tar.gz
|
3 |
*.swp
|
|
|
|
1 |
ckpt/
|
2 |
*.tar.gz
|
3 |
*.swp
|
4 |
+
pya0
|
README.md
CHANGED
@@ -26,8 +26,11 @@ Download your tokenizer, model checkpoints, and optionally the training logs (`e
|
|
26 |
|
27 |
Optionally, test model using the MLM task:
|
28 |
```sh
|
29 |
-
pip install pya0
|
30 |
-
|
|
|
|
|
|
|
31 |
```
|
32 |
> **Note**
|
33 |
> Modify the test examples in `test.txt` to play with it.
|
|
|
26 |
|
27 |
Optionally, test model using the MLM task:
|
28 |
```sh
|
29 |
+
pip install pya0 # for math token preprocessing
|
30 |
+
# testing local checkpoints:
|
31 |
+
python test.py ./ckpt/math-tokenizer ./ckpt/2-2-0/encoder.ckpt
|
32 |
+
# testing Model Hub checkpoints:
|
33 |
+
python test.py approach0/coco-mae-220 approach0/coco-mae-220
|
34 |
```
|
35 |
> **Note**
|
36 |
> Modify the test examples in `test.txt` to play with it.
|
test.py
CHANGED
@@ -43,6 +43,7 @@ def test(tokenizer_name_or_path, model_name_or_path, test_file='test.txt'):
|
|
43 |
for pos in filter(lambda x: x!=0, maskpos):
|
44 |
tokens[pos-1] = '[MASK]'
|
45 |
sentence = ' '.join(tokens)
|
|
|
46 |
tokens = tokenizer(sentence,
|
47 |
padding=True, truncation=True, return_tensors="pt")
|
48 |
#print(tokenizer.decode(tokens['input_ids'][0]))
|
|
|
43 |
for pos in filter(lambda x: x!=0, maskpos):
|
44 |
tokens[pos-1] = '[MASK]'
|
45 |
sentence = ' '.join(tokens)
|
46 |
+
sentence = sentence.replace('[mask]', '[MASK]')
|
47 |
tokens = tokenizer(sentence,
|
48 |
padding=True, truncation=True, return_tensors="pt")
|
49 |
#print(tokenizer.decode(tokens['input_ids'][0]))
|