f77777 commited on
Commit
c9dabdc
1 Parent(s): e63d13d

Upload tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +57 -57
vocab.json CHANGED
@@ -1,64 +1,64 @@
1
  {
2
  "[PAD]": 61,
3
  "[UNK]": 60,
4
- "|": 27,
5
- "ँ": 11,
6
- "ं": 22,
7
- "ः": 29,
8
- "अ": 6,
9
- "आ": 44,
10
  "इ": 26,
11
- "ई": 34,
12
- "उ": 23,
13
- "ऊ": 19,
14
- "ए": 4,
15
- "ओ": 21,
16
- "औ": 43,
17
- "क": 58,
18
- "ख": 17,
19
  "ग": 12,
20
- "घ": 52,
21
- "ङ": 57,
22
- "च": 32,
23
- "छ": 33,
24
- "ज": 59,
25
- "झ": 2,
26
- "ञ": 31,
27
- "ट": 7,
28
- "ठ": 1,
29
- "ड": 30,
30
- "ढ": 36,
31
- "ण": 41,
32
- "त": 37,
33
- "थ": 47,
34
- "द": 10,
35
- "ध": 56,
36
- "न": 46,
37
- "प": 54,
38
- "फ": 28,
39
- "ब": 9,
40
- "भ": 35,
41
- "म": 48,
42
  "य": 49,
43
- "र": 3,
44
- "ल": 8,
45
- "व": 53,
46
- "श": 51,
47
- "ष": 0,
48
- "स": 38,
49
- "ह": 16,
50
- "ा": 15,
51
- "ि": 50,
52
- "ी": 45,
53
- "ु": 39,
54
- "ू": 25,
55
- "ृ": 20,
56
- "े": 42,
57
- "ै": 40,
58
- "ो": 18,
59
- "ौ": 5,
60
- "्": 14,
61
- "।": 13,
62
- "–": 55,
63
- "’": 24
64
  }
 
1
  {
2
  "[PAD]": 61,
3
  "[UNK]": 60,
4
+ "|": 47,
5
+ "ँ": 24,
6
+ "ं": 48,
7
+ "ः": 25,
8
+ "अ": 42,
9
+ "आ": 4,
10
  "इ": 26,
11
+ "ई": 14,
12
+ "उ": 10,
13
+ "ऊ": 13,
14
+ "ए": 27,
15
+ "ओ": 30,
16
+ "औ": 11,
17
+ "क": 40,
18
+ "ख": 39,
19
  "ग": 12,
20
+ "घ": 36,
21
+ "ङ": 41,
22
+ "च": 57,
23
+ "छ": 6,
24
+ "ज": 9,
25
+ "झ": 44,
26
+ "ञ": 1,
27
+ "ट": 20,
28
+ "ठ": 2,
29
+ "ड": 28,
30
+ "ढ": 50,
31
+ "ण": 5,
32
+ "त": 43,
33
+ "थ": 22,
34
+ "द": 32,
35
+ "ध": 31,
36
+ "न": 55,
37
+ "प": 46,
38
+ "फ": 45,
39
+ "ब": 53,
40
+ "भ": 15,
41
+ "म": 33,
42
  "य": 49,
43
+ "र": 21,
44
+ "ल": 59,
45
+ "व": 7,
46
+ "श": 8,
47
+ "ष": 38,
48
+ "स": 54,
49
+ "ह": 19,
50
+ "ा": 52,
51
+ "ि": 3,
52
+ "ी": 16,
53
+ "ु": 58,
54
+ "ू": 29,
55
+ "ृ": 56,
56
+ "े": 17,
57
+ "ै": 37,
58
+ "ो": 23,
59
+ "ौ": 18,
60
+ "्": 34,
61
+ "।": 0,
62
+ "–": 51,
63
+ "’": 35
64
  }