anah1tbaghdassarian commited on
Commit
e722e9b
1 Parent(s): 9c180cc

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +1 -2
  2. vocab.json +1 -1
tokenizer_config.json CHANGED
@@ -39,10 +39,9 @@
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
42
- "processor_class": "Wav2Vec2Processor",
43
  "replace_word_delimiter_char": " ",
44
  "target_lang": null,
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
  "unk_token": "[UNK]",
47
- "word_delimiter_token": " "
48
  }
 
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
 
42
  "replace_word_delimiter_char": " ",
43
  "target_lang": null,
44
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
45
  "unk_token": "[UNK]",
46
+ "word_delimiter_token": "|"
47
  }
vocab.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- " ": 0,
3
  "[PAD]": 41,
4
  "[UNK]": 40,
 
5
  "ա": 1,
6
  "բ": 2,
7
  "գ": 3,
 
1
  {
 
2
  "[PAD]": 41,
3
  "[UNK]": 40,
4
+ "|": 0,
5
  "ա": 1,
6
  "բ": 2,
7
  "գ": 3,