ndeclarke
/

wav2vec2-mms-1b-CV17.0-training_set_variations

@@ -1,20 +1,20 @@
 ---
-library_name: transformers
-license: cc-by-nc-4.0
 base_model: facebook/mms-1b-all
-tags:
-- generated_from_trainer
 datasets:
 - common_voice_17_0
 metrics:
 - wer
 - bleu
 model-index:
 - name: wav2vec2-mms-1b-CV17.0-training_set_variations
   results:
   - task:
-      name: Automatic Speech Recognition
       type: automatic-speech-recognition
     dataset:
       name: common_voice_17_0
       type: common_voice_17_0
@@ -22,12 +22,12 @@ model-index:
       split: validation[:5%]+validation[20%:25%]+validation[60%:65%]+validation[90%:]
       args: ta
     metrics:
-    - name: Wer
-      type: wer
       value: 0.5119016249451032
-    - name: Bleu
-      type: bleu
       value: 0.24178033350654143
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You

 ---
 base_model: facebook/mms-1b-all
 datasets:
 - common_voice_17_0
+library_name: transformers
+license: cc-by-nc-4.0
 metrics:
 - wer
 - bleu
+tags:
+- generated_from_trainer
 model-index:
 - name: wav2vec2-mms-1b-CV17.0-training_set_variations
   results:
   - task:
       type: automatic-speech-recognition
+      name: Automatic Speech Recognition
     dataset:
       name: common_voice_17_0
       type: common_voice_17_0
       split: validation[:5%]+validation[20%:25%]+validation[60%:65%]+validation[90%:]
       args: ta
     metrics:
+    - type: wer
       value: 0.5119016249451032
+      name: Wer
+    - type: bleu
       value: 0.24178033350654143
+      name: Bleu
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You

tokenizer_config.json CHANGED Viewed

@@ -39,9 +39,8 @@
   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
-  "processor_class": "Wav2Vec2Processor",
   "replace_word_delimiter_char": " ",
-  "target_lang": "tam-32",
   "tokenizer_class": "Wav2Vec2CTCTokenizer",
   "unk_token": "[UNK]",
   "word_delimiter_token": "|"

   "eos_token": "</s>",
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "[PAD]",
   "replace_word_delimiter_char": " ",
+  "target_lang": "tam-128",
   "tokenizer_class": "Wav2Vec2CTCTokenizer",
   "unk_token": "[UNK]",
   "word_delimiter_token": "|"

vocab.json CHANGED Viewed

@@ -1,4 +1,61 @@
 {
   "tam-32": {
     "&": 1,
     "[PAD]": 54,

 {
+  "tam-128": {
+    "&": 1,
+    "[PAD]": 54,
+    "[UNK]": 53,
+    "_": 2,
+    "|": 0,
+    "¾": 3,
+    "ஃ": 4,
+    "அ": 5,
+    "ஆ": 6,
+    "இ": 7,
+    "ஈ": 8,
+    "உ": 9,
+    "ஊ": 10,
+    "எ": 11,
+    "ஏ": 12,
+    "ஐ": 13,
+    "ஒ": 14,
+    "ஓ": 15,
+    "ஔ": 16,
+    "க": 17,
+    "ங": 18,
+    "ச": 19,
+    "ஜ": 20,
+    "ஞ": 21,
+    "ட": 22,
+    "ண": 23,
+    "த": 24,
+    "ந": 25,
+    "ன": 26,
+    "ப": 27,
+    "ம": 28,
+    "ய": 29,
+    "ர": 30,
+    "ற": 31,
+    "ல": 32,
+    "ள": 33,
+    "ழ": 34,
+    "வ": 35,
+    "ஷ": 36,
+    "ஸ": 37,
+    "ஹ": 38,
+    "ா": 39,
+    "ி": 40,
+    "ீ": 41,
+    "ு": 42,
+    "ூ": 43,
+    "ெ": 44,
+    "ே": 45,
+    "ை": 46,
+    "ொ": 47,
+    "ோ": 48,
+    "ௌ": 49,
+    "்": 50,
+    "ௗ": 51,
+    "ഥ": 52
+  },
   "tam-32": {
     "&": 1,
     "[PAD]": 54,