joaoalvarenga commited on
Commit
815b0ad
1 Parent(s): eb9a32d

Just fixing evaluation script, model has not been updated

Browse files
Files changed (1) hide show
  1. README.md +9 -5
README.md CHANGED
@@ -29,7 +29,7 @@ model-index:
29
  metrics:
30
  - name: Test WER
31
  type: wer
32
- value: 15.734702%
33
  ---
34
 
35
 
@@ -78,23 +78,27 @@ print("Reference:", test_dataset["sentence"][:2])
78
 
79
  The model can be evaluated as follows on the Portuguese test data of Common Voice.
80
 
 
 
81
 
82
  ```python
83
  import torch
84
  import torchaudio
85
  from datasets import load_dataset, load_metric
86
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 
87
  import re
88
 
89
  test_dataset = load_dataset("common_voice", "pt", split="test")
90
  wer = load_metric("wer")
91
 
92
- processor = Wav2Vec2Processor.from_pretrained("joorock12/wav2vec2-large-xlsr-portuguese")
93
- model = Wav2Vec2ForCTC.from_pretrained("joorock12/wav2vec2-large-xlsr-portuguese")
94
  model.to("cuda")
95
 
96
  chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\'\�]'
97
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
 
98
 
99
  # Preprocessing the datasets.
100
  # We need to read the aduio files as arrays
@@ -115,7 +119,7 @@ def evaluate(batch):
115
  logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
116
 
117
  pred_ids = torch.argmax(logits, dim=-1)
118
- batch["pred_strings"] = processor.batch_decode(pred_ids)
119
  return batch
120
 
121
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
@@ -123,7 +127,7 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
123
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
124
  ```
125
 
126
- **Test Result (wer)**: 15.734702%
127
 
128
 
129
  ## Training
 
29
  metrics:
30
  - name: Test WER
31
  type: wer
32
+ value: 13.766801%
33
  ---
34
 
35
 
 
78
 
79
  The model can be evaluated as follows on the Portuguese test data of Common Voice.
80
 
81
+ You need to install Enelvo, an open-source spell correction trained with Twitter user posts
82
+ `pip install enelvo`
83
 
84
  ```python
85
  import torch
86
  import torchaudio
87
  from datasets import load_dataset, load_metric
88
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
89
+ from enelvo import normaliser
90
  import re
91
 
92
  test_dataset = load_dataset("common_voice", "pt", split="test")
93
  wer = load_metric("wer")
94
 
95
+ processor = Wav2Vec2Processor.from_pretrained("joorock12/wav2vec2-large-xlsr-portuguese-a")
96
+ model = Wav2Vec2ForCTC.from_pretrained("joorock12/wav2vec2-large-xlsr-portuguese-a")
97
  model.to("cuda")
98
 
99
  chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\'\�]'
100
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
101
+ norm = normaliser.Normaliser()
102
 
103
  # Preprocessing the datasets.
104
  # We need to read the aduio files as arrays
 
119
  logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
120
 
121
  pred_ids = torch.argmax(logits, dim=-1)
122
+ batch["pred_strings"] = [norm.normalise(i) for i in processor.batch_decode(pred_ids)]
123
  return batch
124
 
125
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
 
127
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
128
  ```
129
 
130
+ **Test Result (wer)**: 13.766801%
131
 
132
 
133
  ## Training