Wilson Wongso
commited on
Commit
•
84a267a
1
Parent(s):
a663308
Upload lm-boosted decoder
Browse files- alphabet.json +1 -0
- language_model/5gram.bin +3 -0
- language_model/attrs.json +1 -0
- language_model/unigrams.txt +0 -0
- preprocessor_config.json +1 -0
- special_tokens_map.json +1 -1
- tokenizer_config.json +1 -1
alphabet.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"labels": [" ", "\uac00", "\uac01", "\uac04", "\uac08", "\uac10", "\uac11", "\uac12", "\uac13", "\uac14", "\uac15", "\uac16", "\uac19", "\uac1a", "\uac1c", "\uac1d", "\uac20", "\uac24", "\uac2f", "\uac31", "\uac70", "\uac71", "\uac74", "\uac77", "\uac78", "\uac80", "\uac81", "\uac83", "\uac89", "\uac8c", "\uac90", "\uac9f", "\uaca0", "\uaca8", "\uaca9", "\uacaa", "\uacac", "\uacb0", "\uacb8", "\uacb9", "\uacbc", "\uacbd", "\uacc1", "\uacc4", "\uace0", "\uace1", "\uace4", "\uace7", "\uace8", "\uacf0", "\uacf1", "\uacf3", "\uacf5", "\uacf6", "\uacfc", "\uacfd", "\uad00", "\uad04", "\uad0c", "\uad11", "\uad18", "\uad2d", "\uad34", "\uad49", "\uad50", "\uad6c", "\uad6d", "\uad70", "\uad73", "\uad74", "\uad75", "\uad76", "\uad7d", "\uad7f", "\uad81", "\uad88", "\uad8c", "\uad90", "\uada4", "\uadc0", "\uaddc", "\uade0", "\uadf8", "\uadf9", "\uadfc", "\uae00", "\uae08", "\uae09", "\uae0b", "\uae0d", "\uae30", "\uae34", "\uae38", "\uae40", "\uae41", "\uae43", "\uae4a", "\uae4c", "\uae4e", "\uae54", "\uae5d", "\uae61", "\uae65", "\uae68", "\uae6c", "\uaebc", "\uaecd", "\uaecf", "\uaed1", "\uaed8", "\uaef4", "\uaf08", "\uaf2c", "\uaf2d", "\uaf34", "\uaf3c", "\uaf3d", "\uaf41", "\uaf42", "\uaf43", "\uaf5d", "\uaf65", "\uafb8", "\uafbc", "\uafc0", "\uafc7", "\uafc8", "\uafce", "\uafd4", "\uafe8", "\uaff0", "\ub00c", "\ub010", "\ub044", "\ub045", "\ub048", "\ub04a", "\ub04c", "\ub053", "\ub054", "\ub057", "\ub05d", "\ub07c", "\ub07d", "\ub080", "\ub08c", "\ub098", "\ub099", "\ub09c", "\ub0a0", "\ub0a1", "\ub0a8", "\ub0a9", "\ub0ab", "\ub0ac", "\ub0ad", "\ub0ae", "\ub0af", "\ub0b3", "\ub0b4", "\ub0b8", "\ub0bc", "\ub0c4", "\ub0c5", "\ub0c7", "\ub0c8", "\ub0c9", "\ub0d0", "\ub0e5", "\ub108", "\ub109", "\ub10c", "\ub110", "\ub113", "\ub118", "\ub11b", "\ub123", "\ub124", "\ub125", "\ub128", "\ub137", "\ub140", "\ub141", "\ub144", "\ub150", "\ub154", "\ub155", "\ub158", "\ub178", "\ub179", "\ub17c", "\ub180", "\ub188", "\ub18d", "\ub192", "\ub193", "\ub1a8", "\ub1cc", "\ub1e8", "\ub1fd", "\ub204", "\ub208", "\ub20c", "\ub220", "\ub25c", "\ub274", "\ub289", "\ub290", "\ub294", "\ub298", "\ub2a0", "\ub2a5", "\ub2a6", "\ub2aa", "\ub2ac", "\ub2c8", "\ub2c9", "\ub2cc", "\ub2d0", "\ub2d8", "\ub2d9", "\ub2db", "\ub2dd", "\ub2e4", "\ub2e5", "\ub2e6", "\ub2e8", "\ub2eb", "\ub2ec", "\ub2ed", "\ub2ee", "\ub2f3", "\ub2f4", "\ub2f5", "\ub2f7", "\ub2f9", "\ub2ff", "\ub300", "\ub301", "\ub310", "\ub313", "\ub354", "\ub355", "\ub358", "\ub35c", "\ub35f", "\ub364", "\ub367", "\ub369", "\ub36b", "\ub36e", "\ub370", "\ub374", "\ub378", "\ub385", "\ub38c", "\ub3c4", "\ub3c5", "\ub3c8", "\ub3cb", "\ub3cc", "\ub3d4", "\ub3d5", "\ub3d7", "\ub3d9", "\ub3db", "\ub3fc", "\ub410", "\ub418", "\ub41c", "\ub420", "\ub428", "\ub429", "\ub450", "\ub451", "\ub454", "\ub458", "\ub460", "\ub465", "\ub46c", "\ub480", "\ub4a4", "\ub4b7", "\ub4c0", "\ub4dc", "\ub4dd", "\ub4e0", "\ub4e3", "\ub4e4", "\ub4ec", "\ub4ed", "\ub4ef", "\ub4f1", "\ub514", "\ub518", "\ub51c", "\ub525", "\ub528", "\ub529", "\ub52a", "\ub530", "\ub531", "\ub534", "\ub538", "\ub540", "\ub544", "\ub545", "\ub54c", "\ub550", "\ub560", "\ub561", "\ub5a0", "\ub5a1", "\ub5a4", "\ub5a8", "\ub5b4", "\ub5bb", "\ub5bc", "\ub5c4", "\ub610", "\ub611", "\ub625", "\ub69c", "\ub69d", "\ub6ab", "\ub6f0", "\ub6f4", "\ub728", "\ub72f", "\ub738", "\ub73b", "\ub744", "\ub754", "\ub760", "\ub764", "\ub768", "\ub775", "\ub77c", "\ub77d", "\ub780", "\ub784", "\ub78c", "\ub78d", "\ub790", "\ub791", "\ub798", "\ub799", "\ub79c", "\ub7a8", "\ub7ab", "\ub7ac", "\ub7ad", "\ub7b4", "\ub7b5", "\ub7c9", "\ub7ec", "\ub7ed", "\ub7f0", "\ub7f4", "\ub7fc", "\ub7fd", "\ub800", "\ub801", "\ub807", "\ub808", "\ub809", "\ub80c", "\ub818", "\ub81b", "\ub824", "\ub825", "\ub828", "\ub82c", "\ub834", "\ub835", "\ub837", "\ub838", "\ub839", "\ub840", "\ub85c", "\ub85d", "\ub860", "\ub864", "\ub86c", "\ub86d", "\ub86f", "\ub871", "\ub8b0", "\ub8cc", "\ub8e1", "\ub8e8", "\ub8ec", "\ub8f0", "\ub8f8", "\ub8f9", "\ub904", "\ub918", "\ub93c", "\ub958", "\ub959", "\ub95c", "\ub960", "\ub96d", "\ub974", "\ub975", "\ub978", "\ub97c", "\ub984", "\ub985", "\ub987", "\ub989", "\ub98e", "\ub9ac", "\ub9ad", "\ub9b0", "\ub9b4", "\ub9bc", "\ub9bd", "\ub9bf", "\ub9c1", "\ub9c8", "\ub9c9", "\ub9cc", "\ub9ce", "\ub9cf", "\ub9d0", "\ub9d1", "\ub9d8", "\ub9d9", "\ub9db", "\ub9dd", "\ub9de", "\ub9e1", "\ub9e4", "\ub9e5", "\ub9e8", "\ub9f9", "\ub9fa", "\uba38", "\uba39", "\uba3c", "\uba40", "\uba48", "\uba4b", "\uba4d", "\uba54", "\uba55", "\uba58", "\uba5c", "\uba67", "\uba70", "\uba74", "\uba78", "\uba85", "\uba87", "\ubaa8", "\ubaa9", "\ubaab", "\ubaac", "\ubab0", "\ubab8", "\ubabb", "\ubabd", "\ubb18", "\ubb34", "\ubb35", "\ubb36", "\ubb38", "\ubb3b", "\ubb3c", "\ubb44", "\ubb47", "\ubb50", "\ubb54", "\ubb58", "\ubba4", "\ubbac", "\ubbc0", "\ubbc8", "\ubbf8", "\ubbf9", "\ubbfc", "\ubbff", "\ubc00", "\ubc0b", "\ubc0c", "\ubc0d", "\ubc0f", "\ubc11", "\ubc14", "\ubc15", "\ubc16", "\ubc18", "\ubc1b", "\ubc1c", "\ubc1d", "\ubc24", "\ubc25", "\ubc29", "\ubc2d", "\ubc30", "\ubc31", "\ubc34", "\ubc40", "\ubc43", "\ubc45", "\ubc84", "\ubc85", "\ubc88", "\ubc8c", "\ubc94", "\ubc95", "\ubc97", "\ubc9a", "\ubca0", "\ubca4", "\ubca8", "\ubcb3", "\ubcbc", "\ubcbd", "\ubcc0", "\ubcc4", "\ubccd", "\ubcd0", "\ubcd1", "\ubcd5", "\ubcf4", "\ubcf5", "\ubcf6", "\ubcf8", "\ubcfc", "\ubd04", "\ubd05", "\ubd07", "\ubd09", "\ubd10", "\ubd24", "\ubd48", "\ubd59", "\ubd80", "\ubd81", "\ubd84", "\ubd88", "\ubd89", "\ubd90", "\ubd93", "\ubd95", "\ubd99", "\ubdd4", "\ubdf0", "\ube0c", "\ube10", "\ube14", "\ube44", "\ube45", "\ube48", "\ube4c", "\ube57", "\ube59", "\ube5a", "\ube5b", "\ube60", "\ube68", "\ube75", "\ube7c", "\ube80", "\ube8c", "\ube8f", "\ube91", "\ubed0", "\ubed1", "\ubed4", "\ubed7", "\ubed8", "\ubf08", "\ubf40", "\ubf51", "\ubf55", "\ubfcc", "\ubfd0", "\ubfdc", "\uc058", "\uc05c", "\uc069", "\uc090", "\uc0ac", "\uc0ad", "\uc0b0", "\uc0b4", "\uc0b6", "\uc0bc", "\uc0bd", "\uc0bf", "\uc0c0", "\uc0c1", "\uc0c8", "\uc0c9", "\uc0cc", "\uc0d0", "\uc0d8", "\uc0dd", "\uc0e4", "\uc0ec", "\uc0f5", "\uc0f7", "\uc11c", "\uc11d", "\uc11e", "\uc120", "\uc123", "\uc124", "\uc12c", "\uc12d", "\uc12f", "\uc130", "\uc131", "\uc138", "\uc139", "\uc13c", "\uc140", "\uc148", "\uc149", "\uc14b", "\uc154", "\uc158", "\uc15c", "\uc168", "\uc170", "\uc18c", "\uc18d", "\uc190", "\uc194", "\uc19c", "\uc19f", "\uc1a1", "\uc1a5", "\uc1c4", "\uc1e0", "\uc1e4", "\uc1fc", "\uc20d", "\uc218", "\uc219", "\uc21c", "\uc220", "\uc228", "\uc22d", "\uc232", "\uc26c", "\uc270", "\uc27c", "\uc27d", "\uc288", "\uc290", "\uc2a4", "\uc2a8", "\uc2ac", "\uc2b4", "\uc2b5", "\uc2b7", "\uc2b9", "\uc2dc", "\uc2dd", "\uc2e0", "\uc2e4", "\uc2eb", "\uc2ec", "\uc2ed", "\uc2f1", "\uc2f6", "\uc2f8", "\uc2f9", "\uc2fc", "\uc300", "\uc308", "\uc30c", "\uc30d", "\uc313", "\uc368", "\uc369", "\uc370", "\uc37c", "\uc3d8", "\uc3dc", "\uc3df", "\uc3e0", "\uc464", "\uc4f0", "\uc4f4", "\uc4f8", "\uc500", "\uc501", "\uc50c", "\uc528", "\uc529", "\uc52c", "\uc538", "\uc53b", "\uc544", "\uc545", "\uc548", "\uc549", "\uc54a", "\uc54c", "\uc553", "\uc554", "\uc555", "\uc557", "\uc558", "\uc559", "\uc55e", "\uc560", "\uc561", "\uc564", "\uc568", "\uc571", "\uc575", "\uc57c", "\uc57d", "\uc587", "\uc591", "\uc597", "\uc598", "\uc5b4", "\uc5b5", "\uc5b8", "\uc5b9", "\uc5bb", "\uc5bc", "\uc5bd", "\uc5c4", "\uc5c5", "\uc5c6", "\uc5c7", "\uc5c8", "\uc5c9", "\uc5ce", "\uc5d0", "\uc5d1", "\uc5d4", "\uc5d8", "\uc5e0", "\uc5e1", "\uc5e3", "\uc5ec", "\uc5ed", "\uc5f0", "\uc5f4", "\uc5f7", "\uc5fc", "\uc5fd", "\uc5ff", "\uc600", "\uc601", "\uc606", "\uc608", "\uc61b", "\uc624", "\uc625", "\uc628", "\uc62c", "\uc62e", "\uc633", "\uc634", "\uc635", "\uc637", "\uc639", "\uc640", "\uc644", "\uc648", "\uc654", "\uc655", "\uc65c", "\uc678", "\uc67c", "\uc694", "\uc695", "\uc6a9", "\uc6b0", "\uc6b1", "\uc6b4", "\uc6b8", "\uc6c0", "\uc6c1", "\uc6c3", "\uc6c5", "\uc6cc", "\uc6d0", "\uc6d4", "\uc6e0", "\uc6e8", "\uc6ec", "\uc6f9", "\uc704", "\uc708", "\uc70c", "\uc717", "\uc719", "\uc720", "\uc721", "\uc724", "\uc728", "\uc735", "\uc73c", "\uc740", "\uc744", "\uc74c", "\uc74d", "\uc751", "\uc758", "\uc774", "\uc775", "\uc778", "\uc77c", "\uc77d", "\uc783", "\uc784", "\uc785", "\uc787", "\uc788", "\uc789", "\uc78a", "\uc78e", "\uc790", "\uc791", "\uc794", "\uc796", "\uc798", "\uc7a0", "\uc7a1", "\uc7a3", "\uc7a5", "\uc7a6", "\uc7ac", "\uc7ad", "\uc7b0", "\uc7bd", "\uc7c1", "\uc800", "\uc801", "\uc804", "\uc808", "\uc80a", "\uc80b", "\uc810", "\uc811", "\uc813", "\uc815", "\uc816", "\uc81c", "\uc81d", "\uc820", "\uc824", "\uc838", "\uc83c", "\uc84c", "\uc870", "\uc871", "\uc874", "\uc878", "\uc880", "\uc881", "\uc885", "\uc88b", "\uc88c", "\uc8c4", "\uc8e0", "\uc8fc", "\uc8fd", "\uc900", "\uc904", "\uc90d", "\uc911", "\uc918", "\uc92c", "\uc950", "\uc954", "\uc958", "\uc96c", "\uc988", "\uc989", "\uc98c", "\uc990", "\uc998", "\uc99d", "\uc9c0", "\uc9c1", "\uc9c4", "\uc9c8", "\uc9ca", "\uc9d0", "\uc9d1", "\uc9d3", "\uc9d5", "\uc9d6", "\uc9d9", "\uc9da", "\uc9dc", "\uc9dd", "\uc9e7", "\uc9ec", "\uc9f8", "\uca0c", "\uca4c", "\uca4d", "\uca54", "\uca5c", "\ucabc", "\ucabd", "\ucac4", "\ucad3", "\ucb49", "\ucbe4", "\ucc0c", "\ucc0d", "\ucc14", "\ucc22", "\ucc27", "\ucc28", "\ucc29", "\ucc2c", "\ucc2e", "\ucc30", "\ucc38", "\ucc3b", "\ucc3d", "\ucc3e", "\ucc44", "\ucc45", "\ucc4c", "\ucc54", "\ucc59", "\ucc60", "\ucc98", "\ucc99", "\ucc9c", "\ucca0", "\ucca8", "\ucca9", "\uccab", "\uccad", "\uccb4", "\uccb8", "\uccbc", "\uccc7", "\uccd0", "\ucce4", "\ucd08", "\ucd09", "\ucd0c", "\ucd18", "\ucd1b", "\ucd1d", "\ucd28", "\ucd2c", "\ucd5c", "\ucd94", "\ucd95", "\ucd98", "\ucd9c", "\ucda4", "\ucda9", "\ucdb0", "\ucde8", "\uce20", "\uce21", "\uce30", "\uce35", "\uce58", "\uce59", "\uce5c", "\uce60", "\uce68", "\uce69", "\uce6b", "\uce6d", "\uce74", "\uce78", "\uce7c", "\uce89", "\uce90", "\uce94", "\uce98", "\ucea0", "\ucee4", "\ucee5", "\ucee8", "\uceeb", "\ucef4", "\ucef5", "\ucef7", "\ucef8", "\ucf00", "\ucf08", "\ucf10", "\ucf11", "\ucf13", "\ucf1c", "\ucf30", "\ucf54", "\ucf58", "\ucf5c", "\ucf64", "\ucf65", "\ucf67", "\ucf69", "\ucf8c", "\ucfc4", "\ucfe0", "\ucfe1", "\ucfe8", "\ucffc", "\ud034", "\ud050", "\ud06c", "\ud070", "\ud074", "\ud07c", "\ud0a4", "\ud0a5", "\ud0a8", "\ud0ac", "\ud0b7", "\ud0b9", "\ud0c0", "\ud0c1", "\ud0c4", "\ud0c8", "\ud0d0", "\ud0d1", "\ud0d3", "\ud0d5", "\ud0dc", "\ud0dd", "\ud0e0", "\ud0ec", "\ud0f1", "\ud130", "\ud131", "\ud134", "\ud138", "\ud143", "\ud145", "\ud14c", "\ud14d", "\ud150", "\ud154", "\ud15c", "\ud17c", "\ud1a0", "\ud1a1", "\ud1a4", "\ud1a8", "\ud1b0", "\ud1b5", "\ud1f4", "\ud22c", "\ud234", "\ud23c", "\ud241", "\ud280", "\ud29c", "\ud2ac", "\ud2b8", "\ud2b9", "\ud2bc", "\ud2bf", "\ud2c0", "\ud2c8", "\ud2f0", "\ud2f1", "\ud2f4", "\ud2f8", "\ud300", "\ud305", "\ud30c", "\ud30d", "\ud30e", "\ud310", "\ud314", "\ud31c", "\ud321", "\ud328", "\ud329", "\ud32c", "\ud330", "\ud33b", "\ud33d", "\ud37c", "\ud380", "\ud384", "\ud38c", "\ud398", "\ud39c", "\ud3a0", "\ud3ab", "\ud3b4", "\ud3b8", "\ud3bc", "\ud3c4", "\ud3c8", "\ud3c9", "\ud3d0", "\ud3ec", "\ud3ed", "\ud3f0", "\ud3f4", "\ud3fc", "\ud45c", "\ud478", "\ud479", "\ud47c", "\ud480", "\ud488", "\ud48b", "\ud48d", "\ud4e8", "\ud4f0", "\ud504", "\ud508", "\ud50c", "\ud514", "\ud53c", "\ud53d", "\ud540", "\ud544", "\ud54f", "\ud551", "\ud558", "\ud559", "\ud55c", "\ud560", "\ud568", "\ud569", "\ud56b", "\ud56d", "\ud574", "\ud575", "\ud578", "\ud587", "\ud588", "\ud589", "\ud5a5", "\ud5c8", "\ud5cc", "\ud5d0", "\ud5d8", "\ud5dd", "\ud5e4", "\ud5e8", "\ud5ec", "\ud5f4", "\ud5f7", "\ud600", "\ud601", "\ud604", "\ud608", "\ud610", "\ud611", "\ud614", "\ud615", "\ud61c", "\ud638", "\ud639", "\ud63c", "\ud640", "\ud648", "\ud649", "\ud64d", "\ud654", "\ud655", "\ud658", "\ud65c", "\ud669", "\ud68c", "\ud68d", "\ud6a1", "\ud6a8", "\ud6c4", "\ud6c8", "\ud6cc", "\ud6e4", "\ud6e8", "\ud6fc", "\ud718", "\ud729", "\ud734", "\ud749", "\ud750", "\ud751", "\ud754", "\ud758", "\ud759", "\ud760", "\ud761", "\ud765", "\ud769", "\ud76c", "\ud770", "\ud788", "\ud78c", "\ud790", "\ud798", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
|
language_model/5gram.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc4723cabeae55f825a8891a5992bbccb7bd1b6ebcca59bbe37750f3b79eeea5
|
3 |
+
size 491009537
|
language_model/attrs.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
|
language_model/unigrams.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
preprocessor_config.json
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
"feature_size": 1,
|
5 |
"padding_side": "right",
|
6 |
"padding_value": 0,
|
|
|
7 |
"return_attention_mask": true,
|
8 |
"sampling_rate": 16000
|
9 |
}
|
|
|
4 |
"feature_size": 1,
|
5 |
"padding_side": "right",
|
6 |
"padding_value": 0,
|
7 |
+
"processor_class": "Wav2Vec2ProcessorWithLM",
|
8 |
"return_attention_mask": true,
|
9 |
"sampling_rate": 16000
|
10 |
}
|
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "w11wo/wav2vec2-xls-r-300m-korean-lm", "tokenizer_class": "Wav2Vec2CTCTokenizer", "processor_class": "Wav2Vec2ProcessorWithLM"}
|