Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -73,19 +73,19 @@ For more information, please take a look at the [official paper](https://arxiv.o
73
  To transcribe audio files the model can be used as a standalone acoustic model as follows:
74
 
75
  ```python
76
- from transformers import Wav2Vec2Processor, Data2VecForCTC
77
  from datasets import load_dataset
78
  import torch
79
 
80
  # load model and processor
81
  processor = Wav2Vec2Processor.from_pretrained("facebook/data2vec-audio-base-960h")
82
- model = Data2VecForCTC.from_pretrained("facebook/data2vec-audio-base-960h")
83
 
84
  # load dummy dataset and read soundfiles
85
  ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
86
 
87
  # tokenize
88
- input_values = processor(ds[0]["audio"]["array"],, return_tensors="pt", padding="longest").input_values # Batch size 1
89
 
90
  # retrieve logits
91
  logits = model(input_values).logits
@@ -100,14 +100,14 @@ To transcribe audio files the model can be used as a standalone acoustic model a
100
  This code snippet shows how to evaluate **facebook/data2vec-audio-base-960h** on LibriSpeech's "clean" and "other" test data.
101
 
102
  ```python
103
- from transformers import Wav2Vec2Processor, Data2VecForCTC
104
  from datasets import load_dataset
105
  import torch
106
  from jiwer import wer
107
 
108
  # load model and processor
109
  processor = Wav2Vec2Processor.from_pretrained("facebook/data2vec-audio-base-960h").to("cuda")
110
- model = Data2VecForCTC.from_pretrained("facebook/data2vec-audio-base-960h")
111
 
112
 
113
  librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
 
73
  To transcribe audio files the model can be used as a standalone acoustic model as follows:
74
 
75
  ```python
76
+ from transformers import Wav2Vec2Processor, Data2VecAudioForCTC
77
  from datasets import load_dataset
78
  import torch
79
 
80
  # load model and processor
81
  processor = Wav2Vec2Processor.from_pretrained("facebook/data2vec-audio-base-960h")
82
+ model = Data2VecAudioForCTC.from_pretrained("facebook/data2vec-audio-base-960h")
83
 
84
  # load dummy dataset and read soundfiles
85
  ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
86
 
87
  # tokenize
88
+ input_values = processor(ds[0]["audio"]["array"], return_tensors="pt", padding="longest").input_values # Batch size 1
89
 
90
  # retrieve logits
91
  logits = model(input_values).logits
 
100
  This code snippet shows how to evaluate **facebook/data2vec-audio-base-960h** on LibriSpeech's "clean" and "other" test data.
101
 
102
  ```python
103
+ from transformers import Wav2Vec2Processor, Data2VecAudioForCTC
104
  from datasets import load_dataset
105
  import torch
106
  from jiwer import wer
107
 
108
  # load model and processor
109
  processor = Wav2Vec2Processor.from_pretrained("facebook/data2vec-audio-base-960h").to("cuda")
110
+ model = Data2VecAudioForCTC.from_pretrained("facebook/data2vec-audio-base-960h")
111
 
112
 
113
  librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")