l3cube-pune commited on
Commit
057fbf8
1 Parent(s): 3e87890

Update model files

Browse files
Files changed (5) hide show
  1. config.json +1 -1
  2. pytorch_model.bin +1 -1
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +3 -5
  5. vocab.txt +0 -0
config.json CHANGED
@@ -18,7 +18,7 @@
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
20
  "torch_dtype": "float32",
21
- "transformers_version": "4.21.2",
22
  "type_vocab_size": 2,
23
  "use_cache": true,
24
  "vocab_size": 52000
 
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
20
  "torch_dtype": "float32",
21
+ "transformers_version": "4.22.1",
22
  "type_vocab_size": 2,
23
  "use_cache": true,
24
  "vocab_size": 52000
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b8c99dc5b9c45b178d9d496055dfd999158ab1fefd238327233970d19d4f1af
3
  size 504192299
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e83d5b5b809071165dbcb97716149a605fb853022abb02754ab6d5450e1c15c1
3
  size 504192299
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,16 +1,14 @@
1
  {
2
  "cls_token": "[CLS]",
3
  "do_basic_tokenize": true,
4
- "do_lower_case": false,
5
- "lowercase": false,
6
  "mask_token": "[MASK]",
7
- "model_max_length": 512,
8
  "name_or_path": "l3cube-pune/hindi-bert-scratch",
9
  "never_split": null,
10
  "pad_token": "[PAD]",
11
  "sep_token": "[SEP]",
12
- "special_tokens_map_file": "/ebs_ds_share/raviraj.j/temp/models/muril-base-cased/special_tokens_map.json",
13
- "strip_accents": false,
14
  "tokenize_chinese_chars": true,
15
  "tokenizer_class": "BertTokenizer",
16
  "unk_token": "[UNK]"
 
1
  {
2
  "cls_token": "[CLS]",
3
  "do_basic_tokenize": true,
4
+ "do_lower_case": true,
 
5
  "mask_token": "[MASK]",
 
6
  "name_or_path": "l3cube-pune/hindi-bert-scratch",
7
  "never_split": null,
8
  "pad_token": "[PAD]",
9
  "sep_token": "[SEP]",
10
+ "special_tokens_map_file": "/ebs_ds_share/raviraj.j/temp/tokenizer/hindi/special_tokens_map.json",
11
+ "strip_accents": null,
12
  "tokenize_chinese_chars": true,
13
  "tokenizer_class": "BertTokenizer",
14
  "unk_token": "[UNK]"
vocab.txt CHANGED
The diff for this file is too large to render. See raw diff