patrickvonplaten commited on
Commit
bc45eb4
1 Parent(s): abbaedb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +39 -1
README.md CHANGED
@@ -56,4 +56,42 @@ To transcribe audio files the model can be used as a standalone acoustic model a
56
  # take argmax and decode
57
  predicted_ids = torch.argmax(logits, dim=-1)
58
  transcription = processor.batch_decode(predicted_ids)
59
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # take argmax and decode
57
  predicted_ids = torch.argmax(logits, dim=-1)
58
  transcription = processor.batch_decode(predicted_ids)
59
+ ```
60
+
61
+ ## Evaluation
62
+
63
+ This code snippet shows how to evaluate **facebook/data2vec-audio-base-960h** on LibriSpeech's "clean" and "other" test data.
64
+
65
+ ```python
66
+ from transformers import Wav2Vec2Processor, Data2VecForCTC
67
+ from datasets import load_dataset
68
+ import torch
69
+ from jiwer import wer
70
+
71
+ # load model and processor
72
+ processor = Wav2Vec2Processor.from_pretrained("facebook/data2vec-audio-base-960h").to("cuda")
73
+ model = Data2VecForCTC.from_pretrained("facebook/data2vec-audio-base-960h")
74
+
75
+
76
+ librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
77
+
78
+ def map_to_pred(batch):
79
+ input_values = processor(batch["audio"]["array"], return_tensors="pt", padding="longest").input_values
80
+ with torch.no_grad():
81
+ logits = model(input_values.to("cuda")).logits
82
+
83
+ predicted_ids = torch.argmax(logits, dim=-1)
84
+ transcription = processor.batch_decode(predicted_ids)
85
+ batch["transcription"] = transcription
86
+ return batch
87
+
88
+ result = librispeech_eval.map(map_to_pred, batched=True, batch_size=1, remove_columns=["audio"])
89
+
90
+ print("WER:", wer(result["text"], result["transcription"]))
91
+ ```
92
+
93
+ *Result (WER)*:
94
+
95
+ | "clean" | "other" |
96
+ |---|---|
97
+ | 3.4 | 8.6 |