ylacombe HF staff commited on
Commit
3723049
1 Parent(s): f9aa1ee

Training in progress, step 400

Browse files
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 44,
3
- "<s>": 43
4
  }
 
1
  {
2
+ "</s>": 40,
3
+ "<s>": 39
4
  }
config.json CHANGED
@@ -1,12 +1,13 @@
1
  {
2
  "_name_or_path": "ylacombe/w2v-bert-2.0",
3
  "activation_dropout": 0.0,
 
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
6
  "add_adapter": true,
7
  "apply_spec_augment": false,
8
  "architectures": [
9
- "Wav2Vec2BERTForCTC"
10
  ],
11
  "attention_dropout": 0.0,
12
  "bos_token_id": 1,
@@ -16,7 +17,7 @@
16
  "contrastive_logits_temperature": 0.1,
17
  "conv_depthwise_kernel_size": 31,
18
  "ctc_loss_reduction": "mean",
19
- "ctc_zero_infinity": false,
20
  "diversity_loss_weight": 0.1,
21
  "eos_token_id": 2,
22
  "feat_proj_dropout": 0.0,
@@ -46,7 +47,7 @@
46
  "num_hidden_layers": 24,
47
  "num_negatives": 100,
48
  "output_hidden_size": 1024,
49
- "pad_token_id": 42,
50
  "position_embeddings_type": "relative_key",
51
  "proj_codevector_dim": 768,
52
  "right_max_position_embeddings": 8,
@@ -76,6 +77,6 @@
76
  "transformers_version": "4.37.0.dev0",
77
  "use_intermediate_ffn_before_adapter": false,
78
  "use_weighted_layer_sum": false,
79
- "vocab_size": 45,
80
  "xvector_output_dim": 512
81
  }
 
1
  {
2
  "_name_or_path": "ylacombe/w2v-bert-2.0",
3
  "activation_dropout": 0.0,
4
+ "adapter_act": "relu",
5
  "adapter_kernel_size": 3,
6
  "adapter_stride": 2,
7
  "add_adapter": true,
8
  "apply_spec_augment": false,
9
  "architectures": [
10
+ "Wav2Vec2BertForCTC"
11
  ],
12
  "attention_dropout": 0.0,
13
  "bos_token_id": 1,
 
17
  "contrastive_logits_temperature": 0.1,
18
  "conv_depthwise_kernel_size": 31,
19
  "ctc_loss_reduction": "mean",
20
+ "ctc_zero_infinity": true,
21
  "diversity_loss_weight": 0.1,
22
  "eos_token_id": 2,
23
  "feat_proj_dropout": 0.0,
 
47
  "num_hidden_layers": 24,
48
  "num_negatives": 100,
49
  "output_hidden_size": 1024,
50
+ "pad_token_id": 38,
51
  "position_embeddings_type": "relative_key",
52
  "proj_codevector_dim": 768,
53
  "right_max_position_embeddings": 8,
 
77
  "transformers_version": "4.37.0.dev0",
78
  "use_intermediate_ffn_before_adapter": false,
79
  "use_weighted_layer_sum": false,
80
+ "vocab_size": 41,
81
  "xvector_output_dim": 512
82
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5600e15d85cd8ffffdb0ff8dfd191738b49b52b5f867bca8154cfb7d9aa8120d
3
- size 2422999060
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:402995db86f95097cfa076e02367acc469a241885744074124832242aeba68a1
3
+ size 2422982660
preprocessor_config.json CHANGED
@@ -1,11 +1,10 @@
1
  {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
  "feature_size": 80,
5
  "num_mel_bins": 80,
6
  "padding_side": "right",
7
  "padding_value": 1,
8
- "processor_class": "Wav2Vec2Processor",
9
  "return_attention_mask": true,
10
  "sampling_rate": 16000,
11
  "stride": 2
 
1
  {
2
+ "feature_extractor_type": "SeamlessM4TFeatureExtractor",
 
3
  "feature_size": 80,
4
  "num_mel_bins": 80,
5
  "padding_side": "right",
6
  "padding_value": 1,
7
+ "processor_class": "Wav2Vec2BertProcessor",
8
  "return_attention_mask": true,
9
  "sampling_rate": 16000,
10
  "stride": 2
runs/Jan15_14-05-01_vorace/events.out.tfevents.1705327546.vorace.343655.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:615ab56e90cda16e37513cc0336e578c4532a54c4f92e57e90956b280bb65a90
3
+ size 4136
runs/Jan15_14-09-37_vorace/events.out.tfevents.1705327823.vorace.344138.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac92050bdb781c16e946dee95f11fca3cc883ef6dbf8572bc4be8398460e5c60
3
+ size 4136
runs/Jan15_14-13-12_vorace/events.out.tfevents.1705328027.vorace.344504.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a25cb677c5b089e5660c6ae93531eceaf6cdbf571303dd2b09d370afd72aeb0f
3
+ size 6648
runs/Jan15_14-28-43_vorace/events.out.tfevents.1705329032.vorace.347376.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ec06b9bacd698152fbbda2fe9742b62bede44317d954d3a07a3062ae603aa42
3
+ size 4136
runs/Jan15_15-02-19_vorace/events.out.tfevents.1705331015.vorace.348037.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322330b6e54dfdccf83e665265ee90e434f1c7b65075462dfa0dbd7be67e282f
3
+ size 6912
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "41": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": false
10
  },
11
- "42": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": false
18
  },
19
- "43": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "44": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
@@ -39,7 +39,7 @@
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
42
- "processor_class": "Wav2Vec2Processor",
43
  "replace_word_delimiter_char": " ",
44
  "target_lang": null,
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
 
1
  {
2
  "added_tokens_decoder": {
3
+ "37": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
 
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "38": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "39": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "40": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
 
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
42
+ "processor_class": "Wav2Vec2BertProcessor",
43
  "replace_word_delimiter_char": " ",
44
  "target_lang": null,
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c59160a475baad18bee490cd0c48f5e0db7fcbb10dbd0a3bf1ab3076b53beb12
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55faed694c10f073dceb228b3f6f82f891efab95cf41709bee3e5c14337451b2
3
  size 4728
vocab.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "[PAD]": 42,
3
- "[UNK]": 41,
4
  "a": 1,
5
  "b": 2,
6
  "c": 3,
@@ -30,16 +30,12 @@
30
  "|": 0,
31
  "â": 27,
32
  "ç": 28,
33
- "é": 29,
34
- "ë": 30,
35
- "î": 31,
36
- "ö": 32,
37
- "û": 33,
38
- "ü": 34,
39
- "ğ": 35,
40
- "ı": 36,
41
- "ş": 37,
42
- "̇": 38,
43
- "’": 39,
44
- "…": 40
45
  }
 
1
  {
2
+ "[PAD]": 38,
3
+ "[UNK]": 37,
4
  "a": 1,
5
  "b": 2,
6
  "c": 3,
 
30
  "|": 0,
31
  "â": 27,
32
  "ç": 28,
33
+ "ë": 29,
34
+ "î": 30,
35
+ "ö": 31,
36
+ "ü": 32,
37
+ "ğ": 33,
38
+ "ı": 34,
39
+ "ş": 35,
40
+ "̇": 36
 
 
 
 
41
  }