blackhole33 commited on
Commit
f5e1a7e
1 Parent(s): ed66e1d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +58 -3
README.md CHANGED
@@ -1,3 +1,58 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - mozilla-foundation/common_voice_17_0
5
+ language:
6
+ - uz
7
+ metrics:
8
+ - wer
9
+ base_model: facebook/wav2vec2-base-960h
10
+ pipeline_tag: automatic-speech-recognition
11
+ library_name: adapter-transformers
12
+ ---
13
+
14
+ # Author
15
+
16
+ Mamayusupov Rifat.
17
+
18
+ # Usage
19
+
20
+ ```
21
+ from transformers import SeamlessM4TFeatureExtractor, Wav2Vec2BertProcessor, Wav2Vec2CTCTokenizer, Wav2Vec2BertForCTC
22
+ from transformers import pipeline
23
+
24
+ # Initialize tokenizer
25
+ tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("/home/rifat/asr", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|")
26
+
27
+ # Initialize feature extractor
28
+ feature_extractor = SeamlessM4TFeatureExtractor(feature_size=80, num_mel_bins=80, sampling_rate=16000, padding_value=0.0)
29
+
30
+ # Initialize processor
31
+ processor = Wav2Vec2BertProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer)
32
+
33
+ # Initialize model
34
+ model = Wav2Vec2BertForCTC.from_pretrained(
35
+ args.pretrained_model,
36
+ attention_dropout=0.0,
37
+ hidden_dropout=0.0,
38
+ feat_proj_dropout=0.0,
39
+ mask_time_prob=0.0,
40
+ layerdrop=0.0,
41
+ ctc_loss_reduction="mean",
42
+ add_adapter=True,
43
+ pad_token_id=processor.tokenizer.pad_token_id,
44
+ vocab_size=len(processor.tokenizer),
45
+ ignore_mismatched_sizes=True
46
+ )
47
+
48
+ model.config.ctc_zero_infinity = True
49
+ model.to("cuda")
50
+
51
+ # Perform inference
52
+ # Initialize the pipeline
53
+ pipe = pipeline(model=model, tokenizer=processor.tokenizer, feature_extractor=feature_extractor, task="automatic-speech-recognition")
54
+
55
+ input_audio = ""
56
+ print(pipe(input_audio)['result_text'])
57
+
58
+ ```