SharmilaAnanthasayanam commited on
Commit
ce64438
1 Parent(s): fb71551

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +0 -1
  2. vocab.json +100 -100
tokenizer_config.json CHANGED
@@ -39,7 +39,6 @@
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
42
- "processor_class": "Wav2Vec2Processor",
43
  "replace_word_delimiter_char": " ",
44
  "target_lang": null,
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
 
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
 
42
  "replace_word_delimiter_char": " ",
43
  "target_lang": null,
44
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
vocab.json CHANGED
@@ -1,104 +1,104 @@
1
  {
2
- "&": 23,
3
- "'": 56,
4
  "[PAD]": 101,
5
  "[UNK]": 100,
6
- "a": 39,
7
- "b": 82,
8
- "c": 3,
9
- "d": 43,
10
- "e": 5,
11
- "f": 79,
12
- "g": 60,
13
- "h": 93,
14
- "i": 16,
15
- "j": 65,
16
- "k": 66,
17
- "l": 27,
18
- "m": 87,
19
- "n": 55,
20
- "o": 52,
21
- "p": 22,
22
- "q": 12,
23
- "r": 77,
24
- "s": 35,
25
- "t": 99,
26
- "u": 68,
27
- "v": 28,
28
- "w": 72,
29
- "x": 62,
30
- "y": 10,
31
- "z": 67,
32
- "|": 26,
33
- "ँ": 90,
34
- "ं": 96,
35
- "ः": 21,
36
- "अ": 85,
37
- "आ": 73,
38
- "इ": 19,
39
- "ई": 17,
40
- "उ": 11,
41
- "ऊ": 51,
42
- "ऋ": 94,
43
- "ए": 1,
44
- "ऐ": 57,
45
- "ऑ": 98,
46
- "ओ": 4,
47
- "औ": 6,
48
- "क": 45,
49
- "ख": 76,
50
- "ग": 59,
51
- "घ": 14,
52
- "च": 89,
53
- "छ": 81,
54
- "ज": 88,
55
- "झ": 42,
56
- "ञ": 41,
57
- "ट": 32,
58
- "ठ": 33,
59
- "ड": 70,
60
- "ढ": 29,
61
- "ण": 36,
62
- "त": 50,
63
- "थ": 46,
64
- "द": 63,
65
- "ध": 71,
66
- "न": 53,
67
- "प": 86,
68
- "फ": 58,
69
- "ब": 25,
70
- "भ": 84,
71
- "म": 74,
72
- "य": 78,
73
- "र": 54,
74
- "ल": 47,
75
- "व": 64,
76
- "श": 38,
77
- "ष": 48,
78
- "स": 37,
79
- "ह": 9,
80
- "़": 44,
81
- "ा": 7,
82
- "ि": 95,
83
- "ी": 100,
84
- "ु": 2,
85
- "ू": 13,
86
- "ृ": 30,
87
- "ॅ": 75,
88
- "े": 49,
89
- "ै": 92,
90
- "ॉ": 18,
91
- "ो": 34,
92
- "ौ": 40,
93
- "्": 61,
94
- "क़": 83,
95
- "ख़": 8,
96
- "ग़": 91,
97
- "ज़": 0,
98
- "ड़": 24,
99
- "ढ़": 69,
100
- "फ़": 80,
101
- "।": 97,
102
- "–": 20,
103
- "’": 15
104
  }
 
1
  {
2
+ "&": 97,
3
+ "'": 15,
4
  "[PAD]": 101,
5
  "[UNK]": 100,
6
+ "a": 98,
7
+ "b": 40,
8
+ "c": 65,
9
+ "d": 72,
10
+ "e": 1,
11
+ "f": 50,
12
+ "g": 90,
13
+ "h": 95,
14
+ "i": 38,
15
+ "j": 12,
16
+ "k": 76,
17
+ "l": 58,
18
+ "m": 66,
19
+ "n": 22,
20
+ "o": 92,
21
+ "p": 69,
22
+ "q": 33,
23
+ "r": 88,
24
+ "s": 31,
25
+ "t": 18,
26
+ "u": 87,
27
+ "v": 61,
28
+ "w": 30,
29
+ "x": 100,
30
+ "y": 0,
31
+ "z": 14,
32
+ "|": 64,
33
+ "ँ": 9,
34
+ "ं": 13,
35
+ "ः": 11,
36
+ "अ": 28,
37
+ "आ": 54,
38
+ "इ": 68,
39
+ "ई": 63,
40
+ "उ": 2,
41
+ "ऊ": 29,
42
+ "ऋ": 48,
43
+ "ए": 20,
44
+ "ऐ": 77,
45
+ "ऑ": 70,
46
+ "ओ": 74,
47
+ "औ": 99,
48
+ "क": 55,
49
+ "ख": 6,
50
+ "ग": 10,
51
+ "घ": 26,
52
+ "च": 79,
53
+ "छ": 60,
54
+ "ज": 62,
55
+ "झ": 27,
56
+ "ञ": 35,
57
+ "ट": 3,
58
+ "ठ": 78,
59
+ "ड": 4,
60
+ "ढ": 21,
61
+ "ण": 57,
62
+ "त": 93,
63
+ "थ": 51,
64
+ "द": 34,
65
+ "ध": 94,
66
+ "न": 32,
67
+ "प": 73,
68
+ "फ": 17,
69
+ "ब": 53,
70
+ "भ": 80,
71
+ "म": 75,
72
+ "य": 96,
73
+ "र": 67,
74
+ "ल": 44,
75
+ "व": 23,
76
+ "श": 56,
77
+ "ष": 52,
78
+ "स": 46,
79
+ "ह": 82,
80
+ "़": 7,
81
+ "ा": 81,
82
+ "ि": 5,
83
+ "ी": 49,
84
+ "ु": 41,
85
+ "ू": 39,
86
+ "ृ": 47,
87
+ "ॅ": 42,
88
+ "े": 37,
89
+ "ै": 83,
90
+ "ॉ": 16,
91
+ "ो": 36,
92
+ "ौ": 24,
93
+ "्": 91,
94
+ "क़": 71,
95
+ "ख़": 89,
96
+ "ग़": 86,
97
+ "ज़": 84,
98
+ "ड़": 45,
99
+ "ढ़": 8,
100
+ "फ़": 19,
101
+ "।": 43,
102
+ "–": 59,
103
+ "’": 85
104
  }