levicu
/

LEVI_whisper_medium.en

@@ -66,8 +66,9 @@ def ASRmanifest(
             with torch.no_grad():
                 with autocast():
                     try:
-                        result = asr_pipeline(audiofile)
                         asrtext = result['text']
                     except (FileNotFoundError, ValueError) as e:
                         print(f'SKIPPED: {audiofile}')
                         continue
@@ -77,49 +78,6 @@ def ASRmanifest(
     compute_time = (et-st)
     print(f'...transcription complete in {compute_time:.1f} sec')
-def load_model(
-    model_path:str,
-    language='english',
-    use_int8 = False,
-    device_map='auto'):
-    warnings.filterwarnings("ignore")
-    transformers.utils.logging.set_verbosity_error()
-    try:
-        model = WhisperForConditionalGeneration.from_pretrained(
-            model_path,
-            load_in_8bit=use_int8,
-            device_map=device_map,
-            use_cache=False,
-            )
-        try:
-            processor=WhisperProcessor.from_pretrained(model_path, language=language, task="transcribe")
-        except OSError:
-            print('missing tokenizer and preprocessor config files in save dir, checking directory above...')
-            processor=WhisperProcessor.from_pretrained(os.path.join(model_path,'..'), language=language, task="transcribe")
-    except OSError as e:
-        print(f'{e}: possibly missing model or config file in model path. Will check for adapter...')
-            # check if PEFT
-        if os.path.isdir(os.path.join(model_path , "adapter_model")):
-            print('found adapter...loading PEFT model')
-            # checkpoint dir needs adapter model subdir with adapter_model.bin and adapter_confg.json
-            peft_config = PeftConfig.from_pretrained(os.path.join(model_path , "adapter_model"))
-            print(f'...loading and merging LORA weights to base model {peft_config.base_model_name_or_path}')
-            model = WhisperForConditionalGeneration.from_pretrained(peft_config.base_model_name_or_path,
-                load_in_8bit=use_int8,
-                device_map=device_map,
-                use_cache=False,
-                )
-            model = PeftModel.from_pretrained(model, os.path.join(model_path,"adapter_model"))
-            model = model.merge_and_unload()
-            processor = WhisperProcessor.from_pretrained(peft_config.base_model_name_or_path, language=language, task="transcribe")
-        else:
-            raise e
-    model.eval()
-    return(model, processor)
 def prepare_pipeline(model_path, generate_opts):
     """Prepare a pipeline for ASR inference
     Args:
@@ -128,16 +86,16 @@ def prepare_pipeline(model_path, generate_opts):
     Returns:
         pipeline: ASR pipeline
     """
-    model, processor = load_model(
-        model_path=model_path)
     asr_pipeline = pipeline(
         "automatic-speech-recognition",
-        model=model,
         tokenizer=processor.tokenizer,
         feature_extractor=processor.feature_extractor,
         generate_kwargs=generate_opts,
-    )
     return asr_pipeline
 #%% WER evaluation functions
@@ -285,7 +243,7 @@ def wer_from_df(
     hypcol='hyp',
     return_alignments=False,
     normalise = True,
-    text_norm_method='isat',
     printout=True):
     """Compute WER from a dataframe containing a ref col and a hyp col
     WER is computed on the edit operation counts over the whole df,
@@ -338,7 +296,7 @@ def wer_from_csv(
     hypcol='hyp',
     return_alignments=False,
     normalise = True,
-    text_norm_method='isat' ,
     printout=True):
     res = pd.read_csv(csv_path).astype(str)

             with torch.no_grad():
                 with autocast():
                     try:
+                        result = asr_pipeline(audiofile )
                         asrtext = result['text']
+                        asr_pipeline.call_count = 0
                     except (FileNotFoundError, ValueError) as e:
                         print(f'SKIPPED: {audiofile}')
                         continue
     compute_time = (et-st)
     print(f'...transcription complete in {compute_time:.1f} sec')
 def prepare_pipeline(model_path, generate_opts):
     """Prepare a pipeline for ASR inference
     Args:
     Returns:
         pipeline: ASR pipeline
     """
+    processor = WhisperProcessor.from_pretrained(model_path)
     asr_pipeline = pipeline(
         "automatic-speech-recognition",
+        model=model_path,
         tokenizer=processor.tokenizer,
         feature_extractor=processor.feature_extractor,
         generate_kwargs=generate_opts,
+        model_kwargs={"load_in_8bit": False},
+        device_map='auto')
     return asr_pipeline
 #%% WER evaluation functions
     hypcol='hyp',
     return_alignments=False,
     normalise = True,
+    text_norm_method='levi',
     printout=True):
     """Compute WER from a dataframe containing a ref col and a hyp col
     WER is computed on the edit operation counts over the whole df,
     hypcol='hyp',
     return_alignments=False,
     normalise = True,
+    text_norm_method='levi' ,
     printout=True):
     res = pd.read_csv(csv_path).astype(str)