Yehor
/

w2v-bert-uk

@@ -69,3 +69,51 @@ torchrun --standalone --nnodes=1 --nproc-per-node=2 ../train_w2v2_bert.py \
   --mask_feature_prob 0.0 \
   --mask_feature_length 10
 ```

   --mask_feature_prob 0.0 \
   --mask_feature_length 10
 ```
+## Usage
+```python
+# pip install -U torch soundfile transformers
+import torch
+import soundfile as sf
+import evaluate
+from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
+# Config
+model_name = 'Yehor/w2v-bert-2.0-uk'
+device = 'cuda:1' # or cpu
+sampling_rate = 16_000
+# Load the model
+asr_model = AutoModelForCTC.from_pretrained(model_name).to(device)
+processor = Wav2Vec2BertProcessor.from_pretrained(model_name)
+paths = [
+  'sample1.wav',
+]
+# Extract audio
+audio_inputs = []
+for path in paths:
+  audio_input, _ = sf.read(path)
+  audio_inputs.append(audio_input)
+# Transcribe the audio
+inputs = processor(audio_inputs, sampling_rate=sampling_rate).input_features
+features = torch.tensor(inputs).to(device)
+with torch.no_grad():
+  logits = asr_model(features).logits
+predicted_ids = torch.argmax(logits, dim=-1)
+predictions = processor.batch_decode(predicted_ids)
+# Log outputs
+print('---')
+print('Predictions:')
+print(predictions)
+print('References:')
+print(references)
+print('---')
+```