Update README.md
Browse files
README.md
CHANGED
@@ -78,4 +78,24 @@ python eval.py --model_id Maniac/wav2vec2-xls-r-urdu --dataset mozilla-foundatio
|
|
78 |
|
79 |
```bash
|
80 |
python eval.py --model_id Maniac/wav2vec2-xls-r-urdu --dataset speech-recognition-community-v2/dev_data --config ur --split validation --chunk_length_s 5.0 --stride_length_s 1.0
|
81 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
```bash
|
80 |
python eval.py --model_id Maniac/wav2vec2-xls-r-urdu --dataset speech-recognition-community-v2/dev_data --config ur --split validation --chunk_length_s 5.0 --stride_length_s 1.0
|
81 |
+
```
|
82 |
+
|
83 |
+
### Inference With LM
|
84 |
+
|
85 |
+
```python
|
86 |
+
import torch
|
87 |
+
from datasets import load_dataset
|
88 |
+
from transformers import AutoModelForCTC, AutoProcessor
|
89 |
+
import torchaudio.functional as F
|
90 |
+
model_id = "Maniac/wav2vec2-xls-r-urdu"
|
91 |
+
sample_iter = iter(load_dataset("mozilla-foundation/common_voice_7_0", "sv-SE", split="test", streaming=True, use_auth_token=True))
|
92 |
+
sample = next(sample_iter)
|
93 |
+
resampled_audio = F.resample(torch.tensor(sample["audio"]["array"]), 48_000, 16_000).numpy()
|
94 |
+
model = AutoModelForCTC.from_pretrained(model_id)
|
95 |
+
processor = AutoProcessor.from_pretrained(model_id)
|
96 |
+
input_values = processor(resampled_audio, return_tensors="pt").input_values
|
97 |
+
with torch.no_grad():
|
98 |
+
logits = model(input_values).logits
|
99 |
+
transcription = processor.batch_decode(logits.numpy()).text
|
100 |
+
# => "jag lämnade grovjobbet åt honom"
|
101 |
+
```
|