mskov commited on
Commit
75e29dc
β€’
1 Parent(s): c4cba89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -53
app.py CHANGED
@@ -22,56 +22,12 @@ set_caching_enabled(False)
22
  disable_caching()
23
 
24
 
25
- #config = AutoConfig.from_pretrained('whisper-small')
26
-
27
- huggingface_token = os.environ["huggingface_token"]
28
-
29
- miso_tokenizer = WhisperTokenizer.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
30
- miso_feature_extractor = WhisperFeatureExtractor.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
31
- whisper_miso=WhisperModel.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token, tokenizer=miso_tokenizer, device_map="auto")
32
-
33
- #whisper_miso=WhisperModel.from_pretrained("openai/whisper-small", use_auth_token=huggingface_token, device_map="auto")
34
-
35
-
36
-
37
-
38
- task_evaluator = evaluator("automatic-speech-recognition")
39
-
40
- #url = {"test" : "https://huggingface.co/datasets/mskov/miso_test/blob/main/test_set.parquet"}
41
- #data = load_dataset("audiofolder", data_dir="mskov/miso_test")
42
- # data = load_dataset("audiofolder", data_files=["datasets/mskov/miso_test/test_set/and.wav","mskov/miso_test/test_set/chew1.wav","mskov/miso_test/test_set/chew3.wav", "mskov/miso_test/test_set/chew3.wav","mskov/miso_test/test_set/chew4.wav","mskov/miso_test/test_set/cough1.wav","mskov/miso_test/test_set/cough2.wav","mskov/miso_test/test_set/cough3.wav","mskov/miso_test/test_set/hi.wav","mskov/miso_test/test_set/knock_knock.wav","mskov/miso_test/test_set/mouth_sounds1.wav","mskov/miso_test/test_set/mouth_sounds2.wav","mskov/miso_test/test_set/no.wav","mskov/miso_test/test_set/not_bad.wav","mskov/miso_test/test_set/oh_i_wish.wav","mskov/miso_test/test_set/pop1.wav","mskov/miso_test/test_set/really.wav","mskov/miso_test/test_set/sigh1.wav","mskov/miso_test/test_set/sigh2.wav","mskov/miso_test/test_set/slurp1.wav","mskov/miso_test/test_set/slurp2.wav","mskov/miso_test/test_set/sneeze1.wav","mskov/miso_test/test_set/sneeze2.wav","mskov/miso_test/test_set/so_i_did_it_again.wav"])
43
- #dataset = load_dataset("mskov/miso_test", split="test").cast_column("audio", Audio())
44
- # dataset = load_dataset("mskov/miso_test", split="test")
45
- dataset = load_dataset("mskov/ESC50", split="test")
46
- print(dataset)
47
- results = task_evaluator.compute(
48
- model_or_pipeline=whisper_miso,
49
- #model_or_pipeline="mskov/whisper-small.en",
50
- data=dataset,
51
- tokenizer=miso_tokenizer,
52
- #feature_extractor=miso_feature_extractor,
53
- input_column="audio",
54
- label_column="category",
55
- # device=None,
56
- strategy="simple",
57
- metric="wer",
58
- )
59
- print(results)
60
-
61
-
62
- def transcribe(audio, state=""):
63
- text = p(audio)["text"]
64
- state += text + " "
65
- returnstate, state
66
-
67
- gr.Interface(
68
- fn=transcribe,
69
- inputs=[
70
- gr.Audio(source="microphone", type="filepath", streaming=True),
71
- "state"
72
- ],
73
- outputs=[
74
- "textbox",
75
- "state"
76
- ],
77
- live=True).launch()
 
22
  disable_caching()
23
 
24
 
25
+ model = WhisperModel.from_pretrained("mskov/whisper_miso")
26
+ feature_extractor = AutoFeatureExtractor.from_pretrained("mskov/whisper_miso")
27
+ ds = load_dataset("mskov.ESC50", split="test")
28
+ inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
29
+ input_features = inputs.input_features
30
+ decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
31
+ last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
32
+ list(last_hidden_state.shape)
33
+ print(list(last_hidden_state.shape))