--- license: apache-2.0 datasets: - mozilla-foundation/common_voice_17_0 language: - uz metrics: - wer base_model: facebook/wav2vec2-base-960h pipeline_tag: automatic-speech-recognition library_name: adapter-transformers --- # Author Mamayusupov Rifat. # Usage ``` from transformers import SeamlessM4TFeatureExtractor, Wav2Vec2BertProcessor, Wav2Vec2CTCTokenizer, Wav2Vec2BertForCTC from transformers import pipeline # Initialize tokenizer tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("/home/rifat/asr", unk_token="[UNK]", pad_token="[PAD]", word_delimiter_token="|") # Initialize feature extractor feature_extractor = SeamlessM4TFeatureExtractor(feature_size=80, num_mel_bins=80, sampling_rate=16000, padding_value=0.0) # Initialize processor processor = Wav2Vec2BertProcessor(feature_extractor=feature_extractor, tokenizer=tokenizer) # Initialize model model = Wav2Vec2BertForCTC.from_pretrained( args.pretrained_model, attention_dropout=0.0, hidden_dropout=0.0, feat_proj_dropout=0.0, mask_time_prob=0.0, layerdrop=0.0, ctc_loss_reduction="mean", add_adapter=True, pad_token_id=processor.tokenizer.pad_token_id, vocab_size=len(processor.tokenizer), ignore_mismatched_sizes=True ) model.config.ctc_zero_infinity = True model.to("cuda") # Perform inference # Initialize the pipeline pipe = pipeline(model=model, tokenizer=processor.tokenizer, feature_extractor=feature_extractor, task="automatic-speech-recognition") input_audio = "" print(pipe(input_audio)['result_text']) ```