blackhole33
commited on
Commit
•
f5e1a7e
1
Parent(s):
ed66e1d
Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,58 @@
|
|
1 |
-
---
|
2 |
-
license: apache-2.0
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
datasets:
|
4 |
+
- mozilla-foundation/common_voice_17_0
|
5 |
+
language:
|
6 |
+
- uz
|
7 |
+
metrics:
|
8 |
+
- wer
|
9 |
+
base_model: facebook/wav2vec2-base-960h
|
10 |
+
pipeline_tag: automatic-speech-recognition
|
11 |
+
library_name: adapter-transformers
|
12 |
+
---
|
13 |
+
|
14 |
+
# Author
|
15 |
+
|
16 |
+
Mamayusupov Rifat.
|
17 |
+
|
18 |
+
# Usage
|
19 |
+
|
20 |
+
```
|
21 |
+
from transformers import SeamlessM4TFeatureExtractor, Wav2Vec2BertProcessor, Wav2Vec2CTCTokenizer, Wav2Vec2BertForCTC
|
22 |
+
from transformers import pipeline
|
23 |
+
|
24 |
+
# Initialize tokenizer
|
25 |
+
tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("/home/rifat/asr", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|")
|
26 |
+
|
27 |
+
# Initialize feature extractor
|
28 |
+
feature_extractor = SeamlessM4TFeatureExtractor(feature_size=80, num_mel_bins=80, sampling_rate=16000, padding_value=0.0)
|
29 |
+
|
30 |
+
# Initialize processor
|
31 |
+
processor = Wav2Vec2BertProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer)
|
32 |
+
|
33 |
+
# Initialize model
|
34 |
+
model = Wav2Vec2BertForCTC.from_pretrained(
|
35 |
+
args.pretrained_model,
|
36 |
+
attention_dropout=0.0,
|
37 |
+
hidden_dropout=0.0,
|
38 |
+
feat_proj_dropout=0.0,
|
39 |
+
mask_time_prob=0.0,
|
40 |
+
layerdrop=0.0,
|
41 |
+
ctc_loss_reduction="mean",
|
42 |
+
add_adapter=True,
|
43 |
+
pad_token_id=processor.tokenizer.pad_token_id,
|
44 |
+
vocab_size=len(processor.tokenizer),
|
45 |
+
ignore_mismatched_sizes=True
|
46 |
+
)
|
47 |
+
|
48 |
+
model.config.ctc_zero_infinity = True
|
49 |
+
model.to("cuda")
|
50 |
+
|
51 |
+
# Perform inference
|
52 |
+
# Initialize the pipeline
|
53 |
+
pipe = pipeline(model=model, tokenizer=processor.tokenizer, feature_extractor=feature_extractor, task="automatic-speech-recognition")
|
54 |
+
|
55 |
+
input_audio = ""
|
56 |
+
print(pipe(input_audio)['result_text'])
|
57 |
+
|
58 |
+
```
|