mskov commited on
Commit
15fe17a
β€’
1 Parent(s): 4ef4640

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -24
app.py CHANGED
@@ -18,36 +18,31 @@ from datasets import set_caching_enabled
18
  set_caching_enabled(False)
19
  disable_caching()
20
 
21
- p = pipeline("automatic-speech-recognition")
22
-
23
- #config = AutoConfig.from_pretrained('whisper-small')
24
 
 
25
  huggingface_token = os.environ["huggingface_token"]
26
-
27
- whisper_miso=WhisperModel.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
28
  miso_tokenizer = WhisperTokenizer.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
29
- #miso_tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", use_auth_token=huggingface_token)
30
 
31
- task_evaluator = p.task_evaluator
32
- task_evaluator.feature_extractor = whisper_miso.get_feature_extractor()
 
 
 
 
33
 
34
- task_evaluator = evaluator("automatic-speech-recognition")
35
- #url = {"test" : "https://huggingface.co/datasets/mskov/miso_test/blob/main/test_set.parquet"}
36
- #data = load_dataset("audiofolder", data_dir="mskov/miso_test")
37
- # data = load_dataset("audiofolder", data_files=["datasets/mskov/miso_test/test_set/and.wav","mskov/miso_test/test_set/chew1.wav","mskov/miso_test/test_set/chew3.wav", "mskov/miso_test/test_set/chew3.wav","mskov/miso_test/test_set/chew4.wav","mskov/miso_test/test_set/cough1.wav","mskov/miso_test/test_set/cough2.wav","mskov/miso_test/test_set/cough3.wav","mskov/miso_test/test_set/hi.wav","mskov/miso_test/test_set/knock_knock.wav","mskov/miso_test/test_set/mouth_sounds1.wav","mskov/miso_test/test_set/mouth_sounds2.wav","mskov/miso_test/test_set/no.wav","mskov/miso_test/test_set/not_bad.wav","mskov/miso_test/test_set/oh_i_wish.wav","mskov/miso_test/test_set/pop1.wav","mskov/miso_test/test_set/really.wav","mskov/miso_test/test_set/sigh1.wav","mskov/miso_test/test_set/sigh2.wav","mskov/miso_test/test_set/slurp1.wav","mskov/miso_test/test_set/slurp2.wav","mskov/miso_test/test_set/sneeze1.wav","mskov/miso_test/test_set/sneeze2.wav","mskov/miso_test/test_set/so_i_did_it_again.wav"])
38
  dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio())
39
- results = task_evaluator.compute(
40
- model_or_pipeline=whisper_miso,
41
- #model_or_pipeline="mskov/whisper-small.en",
42
- data=dataset,
43
- tokenizer=miso_tokenizer,
44
- input_column="audio",
45
- label_column="audio",
46
- # device=None,
47
- strategy="simple",
48
- metric="wer",
49
- )
50
- print(results)
51
 
52
 
53
  def transcribe(audio, state=""):
 
18
  set_caching_enabled(False)
19
  disable_caching()
20
 
21
+ from transformers import pipeline, WhisperModel, WhisperTokenizer, AutoConfig
22
+ from datasets import load_dataset
23
+ from transformers import WERMetric
24
 
25
+ # Load the Whisper model and tokenizer
26
  huggingface_token = os.environ["huggingface_token"]
27
+ whisper_miso = WhisperModel.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
 
28
  miso_tokenizer = WhisperTokenizer.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
 
29
 
30
+ # Initialize the automatic-speech-recognition pipeline with the Whisper model and tokenizer
31
+ asr_pipeline = pipeline(
32
+ "automatic-speech-recognition",
33
+ model=whisper_miso,
34
+ tokenizer=miso_tokenizer
35
+ )
36
 
37
+ # Load the dataset
 
 
 
38
  dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio())
39
+
40
+ # Compute the evaluation results
41
+ results = asr_pipeline(dataset)
42
+ metric = WERMetric()
43
+ wer = metric.compute(predictions=results, references=dataset["audio"])
44
+ print(wer)
45
+
 
 
 
 
 
46
 
47
 
48
  def transcribe(audio, state=""):