mskov commited on
Commit
877c079
β€’
1 Parent(s): 5e24192

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -23
app.py CHANGED
@@ -20,39 +20,42 @@ from datasets import set_caching_enabled
20
  set_caching_enabled(False)
21
  disable_caching()
22
 
23
- from transformers import pipeline, WhisperModel, WhisperTokenizer, AutoConfig
24
- from datasets import load_dataset
25
 
 
26
 
27
- metric = evaluate.load("wer")
28
-
29
- # Load the Whisper model and tokenizer
30
  huggingface_token = os.environ["huggingface_token"]
31
- whisper_miso =AutoModelForCTC.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
32
- miso_tokenizer = AutoModelForCTC.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
33
-
34
- # Initialize the automatic-speech-recognition pipeline with the Whisper model and tokenizer
35
- asr_pipeline = pipeline(
36
- "automatic-speech-recognition",
37
- model=whisper_miso,
38
- tokenizer=miso_tokenizer
39
- )
40
 
41
- # Load the dataset
42
- dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio())
 
43
 
44
- # Compute the evaluation results
45
- results = asr_pipeline(dataset)
46
- metric = WERMetric()
47
- wer = metric.compute(predictions=results, references=dataset["audio"])
48
- print(wer)
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
 
52
  def transcribe(audio, state=""):
53
  text = p(audio)["text"]
54
  state += text + " "
55
- return state, state
56
 
57
  gr.Interface(
58
  fn=transcribe,
@@ -64,4 +67,4 @@ gr.Interface(
64
  "textbox",
65
  "state"
66
  ],
67
- live=True).launch()
 
20
  set_caching_enabled(False)
21
  disable_caching()
22
 
23
+ p = pipeline("automatic-speech-recognition")
 
24
 
25
+ #config = AutoConfig.from_pretrained('whisper-small')
26
 
 
 
 
27
  huggingface_token = os.environ["huggingface_token"]
 
 
 
 
 
 
 
 
 
28
 
29
+ whisper_miso=WhisperModel.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
30
+ miso_tokenizer = WhisperTokenizer.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
31
+ #miso_tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", use_auth_token=huggingface_token)
32
 
33
+ task_evaluator = p.task_evaluator
34
+ task_evaluator.feature_extractor = whisper_miso.get_feature_extractor()
 
 
 
35
 
36
+ task_evaluator = evaluator("automatic-speech-recognition")
37
+ #url = {"test" : "https://huggingface.co/datasets/mskov/miso_test/blob/main/test_set.parquet"}
38
+ #data = load_dataset("audiofolder", data_dir="mskov/miso_test")
39
+ # data = load_dataset("audiofolder", data_files=["datasets/mskov/miso_test/test_set/and.wav","mskov/miso_test/test_set/chew1.wav","mskov/miso_test/test_set/chew3.wav", "mskov/miso_test/test_set/chew3.wav","mskov/miso_test/test_set/chew4.wav","mskov/miso_test/test_set/cough1.wav","mskov/miso_test/test_set/cough2.wav","mskov/miso_test/test_set/cough3.wav","mskov/miso_test/test_set/hi.wav","mskov/miso_test/test_set/knock_knock.wav","mskov/miso_test/test_set/mouth_sounds1.wav","mskov/miso_test/test_set/mouth_sounds2.wav","mskov/miso_test/test_set/no.wav","mskov/miso_test/test_set/not_bad.wav","mskov/miso_test/test_set/oh_i_wish.wav","mskov/miso_test/test_set/pop1.wav","mskov/miso_test/test_set/really.wav","mskov/miso_test/test_set/sigh1.wav","mskov/miso_test/test_set/sigh2.wav","mskov/miso_test/test_set/slurp1.wav","mskov/miso_test/test_set/slurp2.wav","mskov/miso_test/test_set/sneeze1.wav","mskov/miso_test/test_set/sneeze2.wav","mskov/miso_test/test_set/so_i_did_it_again.wav"])
40
+ dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio())
41
+ results = task_evaluator.compute(
42
+ model_or_pipeline=whisper_miso,
43
+ #model_or_pipeline="mskov/whisper-small.en",
44
+ data=dataset,
45
+ tokenizer=miso_tokenizer,
46
+ input_column="audio",
47
+ label_column="audio",
48
+ # device=None,
49
+ strategy="simple",
50
+ metric="wer",
51
+ )
52
+ print(results)
53
 
54
 
55
  def transcribe(audio, state=""):
56
  text = p(audio)["text"]
57
  state += text + " "
58
+ returnstate, state
59
 
60
  gr.Interface(
61
  fn=transcribe,
 
67
  "textbox",
68
  "state"
69
  ],
70
+ live=True).launch()