mskov commited on
Commit
db75012
β€’
1 Parent(s): 3836e33

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -2
app.py CHANGED
@@ -4,7 +4,7 @@ os.system("pip install transformers==4.27.0")
4
  os.system("pip install torch")
5
  os.system("pip install openai")
6
  os.system("pip install accelerate")
7
- from transformers import pipeline, WhisperModel, WhisperTokenizer, WhisperFeatureExtractor, AutoFeatureExtractor
8
  os.system("pip install evaluate")
9
  #import evaluate
10
  #os.system("pip install evaluate[evaluator]")
@@ -24,15 +24,33 @@ disable_caching()
24
 
25
  huggingface_token = os.environ["huggingface_token"]
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  model = WhisperModel.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
29
  feature_extractor = AutoFeatureExtractor.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
 
 
30
  ds = load_dataset("mskov/miso_test", split="test")
31
  ds = ds.cast_column("audio", Audio(sampling_rate=16000))
 
 
32
  inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
33
  print(inputs)
34
  input_features = inputs.input_features
35
  decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
36
  last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
37
  list(last_hidden_state.shape)
38
- print(list(last_hidden_state.shape))
 
 
4
  os.system("pip install torch")
5
  os.system("pip install openai")
6
  os.system("pip install accelerate")
7
+ from transformers import pipeline, WhisperModel, WhisperTokenizer, WhisperFeatureExtractor, AutoFeatureExtractor, AutoProcessor
8
  os.system("pip install evaluate")
9
  #import evaluate
10
  #os.system("pip install evaluate[evaluator]")
 
24
 
25
  huggingface_token = os.environ["huggingface_token"]
26
 
27
+ processor = AutoProcessor.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
28
+
29
+ def prepare_dataset(batch):
30
+ audio = batch["audio"]
31
+ batch["input_values"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_values[0]
32
+ batch["input_length"] = len(batch["input_values"])
33
+ with processor.as_target_processor():
34
+ batch["labels"] = processor(batch["sentence"]).input_ids
35
+ return batch
36
+ dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)
37
+ print(dataset)
38
+
39
+ '''
40
 
41
  model = WhisperModel.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
42
  feature_extractor = AutoFeatureExtractor.from_pretrained("mskov/whisper_miso", use_auth_token=huggingface_token)
43
+
44
+
45
  ds = load_dataset("mskov/miso_test", split="test")
46
  ds = ds.cast_column("audio", Audio(sampling_rate=16000))
47
+
48
+
49
  inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
50
  print(inputs)
51
  input_features = inputs.input_features
52
  decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
53
  last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
54
  list(last_hidden_state.shape)
55
+ print(list(last_hidden_state.shape))
56
+ '''