Training in progress, step 200

Browse files

Files changed (9) hide show

added_tokens.json +4 -0
config.json +108 -0
model.safetensors +3 -0
preprocessor_config.json +10 -0
runs/Oct22_13-17-20_ec3cd788a92f/events.out.tfevents.1729603136.ec3cd788a92f.1076.0 +3 -0
special_tokens_map.json +6 -0
tokenizer_config.json +48 -0
training_args.bin +3 -0
vocab.json +230 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 227,
+  "<s>": 226
+}

config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_name_or_path": "facebook/mms-1b-all",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": 16,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.05,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 225,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.0.dev0",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 228,
+  "xvector_output_dim": 512
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4647178670eee6a56e1bb32900edbf58cc4bd1ca54fdbc1484cfe01f5f9efa9
+size 3859900376

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

runs/Oct22_13-17-20_ec3cd788a92f/events.out.tfevents.1729603136.ec3cd788a92f.1076.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:540ab612aa6e23d562ced4d85a093727f310007863f74cc28081b9e6cc2e2893
+size 14059

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "[PAD]",
+  "unk_token": "[UNK]"
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "added_tokens_decoder": {
+    "224": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "225": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "226": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "227": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": "amh",
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24282700d6fc8dcd0e85f1266e6307f54b29df9454c4ef0ebd88b35eb0294e9f
+size 5240

vocab.json ADDED Viewed

	@@ -0,0 +1,230 @@

+{
+  "amh": {
+    "[PAD]": 225,
+    "[UNK]": 224,
+    "|": 0,
+    "ሀ": 1,
+    "ሁ": 2,
+    "ሂ": 3,
+    "ሃ": 4,
+    "ሄ": 5,
+    "ህ": 6,
+    "ሆ": 7,
+    "ለ": 8,
+    "ሉ": 9,
+    "ሊ": 10,
+    "ላ": 11,
+    "ሌ": 12,
+    "ል": 13,
+    "ሎ": 14,
+    "ሏ": 15,
+    "ሐ": 16,
+    "ሑ": 17,
+    "ሓ": 18,
+    "ሔ": 19,
+    "ሕ": 20,
+    "ሗ": 21,
+    "መ": 22,
+    "ሙ": 23,
+    "ሚ": 24,
+    "ማ": 25,
+    "ሜ": 26,
+    "ም": 27,
+    "ሞ": 28,
+    "ሟ": 29,
+    "ሠ": 30,
+    "ሡ": 31,
+    "ሣ": 32,
+    "ሥ": 33,
+    "ረ": 34,
+    "ሩ": 35,
+    "ሪ": 36,
+    "ራ": 37,
+    "ሬ": 38,
+    "ር": 39,
+    "ሮ": 40,
+    "ሯ": 41,
+    "ሰ": 42,
+    "ሱ": 43,
+    "ሲ": 44,
+    "ሳ": 45,
+    "ሴ": 46,
+    "ስ": 47,
+    "ሶ": 48,
+    "ሷ": 49,
+    "ሸ": 50,
+    "ሹ": 51,
+    "ሺ": 52,
+    "ሻ": 53,
+    "ሼ": 54,
+    "ሽ": 55,
+    "ሾ": 56,
+    "ቀ": 57,
+    "ቁ": 58,
+    "ቂ": 59,
+    "ቃ": 60,
+    "ቄ": 61,
+    "ቅ": 62,
+    "ቆ": 63,
+    "ቋ": 64,
+    "በ": 65,
+    "ቡ": 66,
+    "ቢ": 67,
+    "ባ": 68,
+    "ቤ": 69,
+    "ብ": 70,
+    "ቦ": 71,
+    "ቧ": 72,
+    "ቨ": 73,
+    "ቪ": 74,
+    "ቫ": 75,
+    "ቬ": 76,
+    "ተ": 77,
+    "ቱ": 78,
+    "ቲ": 79,
+    "ታ": 80,
+    "ቴ": 81,
+    "ት": 82,
+    "ቶ": 83,
+    "ቷ": 84,
+    "ቸ": 85,
+    "ቹ": 86,
+    "ቺ": 87,
+    "ቻ": 88,
+    "ቼ": 89,
+    "ች": 90,
+    "ቿ": 91,
+    "ኃ": 92,
+    "ኅ": 93,
+    "ኋ": 94,
+    "ነ": 95,
+    "ኑ": 96,
+    "ኒ": 97,
+    "ና": 98,
+    "ኔ": 99,
+    "ን": 100,
+    "ኖ": 101,
+    "ኗ": 102,
+    "ኘ": 103,
+    "ኙ": 104,
+    "ኚ": 105,
+    "ኛ": 106,
+    "ኝ": 107,
+    "ኞ": 108,
+    "አ": 109,
+    "ኢ": 110,
+    "ኣ": 111,
+    "ኤ": 112,
+    "እ": 113,
+    "ኦ": 114,
+    "ከ": 115,
+    "ኩ": 116,
+    "ኪ": 117,
+    "ካ": 118,
+    "ኬ": 119,
+    "ክ": 120,
+    "ኮ": 121,
+    "ኰ": 122,
+    "ኲ": 123,
+    "ኳ": 124,
+    "ኸ": 125,
+    "ኼ": 126,
+    "ኽ": 127,
+    "ኾ": 128,
+    "ዃ": 129,
+    "ወ": 130,
+    "ዉ": 131,
+    "ዊ": 132,
+    "ዋ": 133,
+    "ዌ": 134,
+    "ው": 135,
+    "ዎ": 136,
+    "ዐ": 137,
+    "ዓ": 138,
+    "ዕ": 139,
+    "ዖ": 140,
+    "ዘ": 141,
+    "ዙ": 142,
+    "ዚ": 143,
+    "ዛ": 144,
+    "ዜ": 145,
+    "ዝ": 146,
+    "ዞ": 147,
+    "ዟ": 148,
+    "ዢ": 149,
+    "ዣ": 150,
+    "ዤ": 151,
+    "ዥ": 152,
+    "የ": 153,
+    "ዩ": 154,
+    "ዪ": 155,
+    "ያ": 156,
+    "ዬ": 157,
+    "ይ": 158,
+    "ዮ": 159,
+    "ደ": 160,
+    "ዱ": 161,
+    "ዲ": 162,
+    "ዳ": 163,
+    "ዴ": 164,
+    "ድ": 165,
+    "ዶ": 166,
+    "ዷ": 167,
+    "ጀ": 168,
+    "ጁ": 169,
+    "ጂ": 170,
+    "ጃ": 171,
+    "ጅ": 172,
+    "ጆ": 173,
+    "ገ": 174,
+    "ጉ": 175,
+    "ጊ": 176,
+    "ጋ": 177,
+    "ጌ": 178,
+    "ግ": 179,
+    "ጎ": 180,
+    "ጐ": 181,
+    "ጓ": 182,
+    "ጠ": 183,
+    "ጡ": 184,
+    "ጢ": 185,
+    "ጣ": 186,
+    "ጤ": 187,
+    "ጥ": 188,
+    "ጦ": 189,
+    "ጧ": 190,
+    "ጨ": 191,
+    "ጩ": 192,
+    "ጪ": 193,
+    "ጫ": 194,
+    "ጭ": 195,
+    "ጮ": 196,
+    "ጳ": 197,
+    "ጵ": 198,
+    "ጸ": 199,
+    "ጹ": 200,
+    "ጻ": 201,
+    "ጽ": 202,
+    "ጾ": 203,
+    "ጿ": 204,
+    "ፀ": 205,
+    "ፁ": 206,
+    "ፃ": 207,
+    "ፅ": 208,
+    "ፆ": 209,
+    "ፈ": 210,
+    "ፉ": 211,
+    "ፊ": 212,
+    "ፋ": 213,
+    "ፌ": 214,
+    "ፍ": 215,
+    "ፎ": 216,
+    "ፏ": 217,
+    "ፑ": 218,
+    "ፒ": 219,
+    "ፓ": 220,
+    "ፕ": 221,
+    "ፖ": 222,
+    "፥": 223
+  }
+}