manjugeorge commited on
Commit
4602135
1 Parent(s): 104adcd

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +2 -2
  2. tokenizer_config.json +4 -5
  3. vocab.json +76 -84
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 86,
3
- "<s>": 85
4
  }
 
1
  {
2
+ "</s>": 78,
3
+ "<s>": 77
4
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "83": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": false
10
  },
11
- "84": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": false
18
  },
19
- "85": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "86": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
@@ -39,7 +39,6 @@
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
42
- "processor_class": "Wav2Vec2Processor",
43
  "replace_word_delimiter_char": " ",
44
  "target_lang": null,
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
 
1
  {
2
  "added_tokens_decoder": {
3
+ "75": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
 
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "76": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "77": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "78": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
 
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
 
42
  "replace_word_delimiter_char": " ",
43
  "target_lang": null,
44
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
vocab.json CHANGED
@@ -1,87 +1,79 @@
1
  {
2
- "!": 76,
3
- "\"": 77,
4
- "'": 71,
5
- ",": 7,
6
- "-": 54,
7
- ".": 67,
8
- ":": 55,
9
- ";": 40,
10
- "?": 10,
11
- "[PAD]": 84,
12
- "[UNK]": 83,
13
- "|": 12,
14
- "": 65,
15
- "": 4,
16
- "": 75,
17
- "": 24,
18
- "": 5,
19
- "": 35,
20
- "": 52,
21
- "": 79,
22
- "": 18,
23
- "": 33,
24
- "": 6,
25
- "": 46,
26
- "": 25,
27
- "": 1,
28
- "": 38,
29
- "": 15,
30
- "": 39,
31
- "": 29,
32
- "": 36,
33
- "": 58,
34
- "": 62,
35
- "": 14,
36
- "ഞ": 50,
37
- "ട": 63,
38
- "ഠ": 37,
39
- "ഡ": 59,
40
- "ഢ": 44,
41
- "ണ": 69,
42
- "ത": 70,
43
- "ഥ": 68,
44
  "ദ": 66,
45
- "ധ": 78,
46
- "ന": 31,
47
- "പ": 20,
48
- "ഫ": 56,
49
- "ബ": 47,
50
- "ഭ": 45,
51
- "മ": 3,
52
- "യ": 81,
53
- "ര": 23,
54
- "റ": 82,
55
- "ല": 74,
56
- "ള": 21,
57
- "ഴ": 57,
58
- "വ": 60,
59
- "ശ": 16,
60
- "ഷ": 32,
61
- "സ": 22,
62
- "ഹ": 26,
63
- "ാ": 80,
64
- "ി": 64,
65
- "ീ": 9,
66
- "ു": 11,
67
- "ൂ": 43,
68
- "ൃ": 53,
69
- "െ": 42,
70
- "േ": 49,
71
- "ൈ": 51,
72
- "ൊ": 27,
73
- "ോ": 0,
74
- "ൌ": 48,
75
- "്": 34,
76
- "ൗ": 72,
77
- "ൺ": 17,
78
- "ൻ": 73,
79
- "ർ": 19,
80
- "ൽ": 41,
81
- "ൾ": 13,
82
- "ൿ": 2,
83
- "‘": 28,
84
- "’": 30,
85
- "“": 61,
86
- "”": 8
87
  }
 
1
  {
2
+ "'": 5,
3
+ "[PAD]": 76,
4
+ "[UNK]": 75,
5
+ "|": 52,
6
+ "": 35,
7
+ "": 29,
8
+ "": 49,
9
+ "": 46,
10
+ "": 17,
11
+ "": 36,
12
+ "": 57,
13
+ "": 67,
14
+ "": 16,
15
+ "": 22,
16
+ "": 15,
17
+ "": 39,
18
+ "": 65,
19
+ "": 61,
20
+ "": 53,
21
+ "": 44,
22
+ "": 10,
23
+ "": 38,
24
+ "": 27,
25
+ "": 14,
26
+ "": 47,
27
+ "": 0,
28
+ "": 24,
29
+ "": 56,
30
+ "": 23,
31
+ "": 11,
32
+ "": 8,
33
+ "": 51,
34
+ "": 55,
35
+ "": 59,
 
 
 
 
 
 
 
 
36
  "ദ": 66,
37
+ "ധ": 28,
38
+ "ന": 64,
39
+ "പ": 42,
40
+ "ഫ": 40,
41
+ "ബ": 25,
42
+ "ഭ": 50,
43
+ "മ": 43,
44
+ "യ": 18,
45
+ "ര": 19,
46
+ "റ": 21,
47
+ "ല": 62,
48
+ "ള": 63,
49
+ "ഴ": 3,
50
+ "വ": 48,
51
+ "ശ": 30,
52
+ "ഷ": 74,
53
+ "സ": 9,
54
+ "ഹ": 69,
55
+ "ാ": 26,
56
+ "ി": 12,
57
+ "ീ": 37,
58
+ "ു": 68,
59
+ "ൂ": 33,
60
+ "ൃ": 1,
61
+ "െ": 7,
62
+ "േ": 58,
63
+ "ൈ": 54,
64
+ "ൊ": 71,
65
+ "ോ": 2,
66
+ "ൌ": 4,
67
+ "്": 45,
68
+ "ൗ": 60,
69
+ "ൺ": 31,
70
+ "ൻ": 32,
71
+ "ർ": 72,
72
+ "ൽ": 13,
73
+ "ൾ": 73,
74
+ "ൿ": 70,
75
+ "‘": 6,
76
+ "’": 41,
77
+ "“": 20,
78
+ "”": 34
79
  }