mskov commited on
Commit
9de729b
β€’
1 Parent(s): db71e09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -58
app.py CHANGED
@@ -1,62 +1,12 @@
1
- import os
2
- import sys
3
- os.system("pip install transformers==4.27.0")
4
- os.system("pip install numpy==1.23")
5
- from transformers import pipeline, WhisperModel, WhisperTokenizer, WhisperFeatureExtractor, AutoFeatureExtractor, AutoProcessor, WhisperConfig, WhisperProcessor, WhisperForConditionalGeneration
6
- os.system("pip install jiwer")
7
- from jiwer import wer
8
- os.system("pip install datasets[audio]")
9
- from evaluate import evaluator, load
10
- from transformers import AutoModelForSequenceClassification, pipeline, BertTokenizer, AutoTokenizer, GPT2Model
11
- from datasets import load_dataset, Audio, disable_caching, set_caching_enabled
12
- import gradio as gr
13
- import torch
14
- from datasets import load_dataset
15
- from transformers import WhisperForConditionalGeneration, WhisperProcessor
16
 
 
17
 
18
- processor = WhisperProcessor.from_pretrained("mskov/whisper-small-esc50")
19
- model = WhisperForConditionalGeneration.from_pretrained("mskov/whisper-small-esc50").to("cuda")
20
 
21
- def map_to_pred(batch):
22
- audio = batch["audio"]
23
- input_features = processor(audio["array"], sampling_rate=16000, return_tensors="pt").input_features
24
- batch["reference"] = processor.tokenizer._normalize(batch['category'])
25
 
26
- with torch.no_grad():
27
- predicted_ids = model.generate(input_features.to("cuda"))[0]
28
- transcription = processor.decode(predicted_ids)
29
- batch["prediction"] = processor.tokenizer._normalize(transcription)
30
- print(batch["prediction"])
31
- return batch
32
-
33
- result = librispeech_test_clean.map(map_to_pred)
34
-
35
- wer = load("wer")
36
- print(100 * wer.compute(references=result["reference"], predictions=result["prediction"]))
37
-
38
-
39
-
40
- def transcribe(audio):
41
- text = pipe(audio)["text"]
42
- return text, test
43
-
44
- iface = gr.Interface(
45
- fn=transcribe,
46
- inputs=gr.Audio(source="microphone", type="filepath"),
47
- outputs="text",
48
- title="Whisper Small ESC50 Test",
49
- )
50
-
51
- iface.launch()
52
-
53
-
54
- '''
55
- print("check check")
56
- print(inputs)
57
- input_features = inputs.input_features
58
- decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
59
- last_hidden_state = model(input_features, decoder_input_ids=decoder_input_ids).last_hidden_state
60
- list(last_hidden_state.shape)
61
- print(list(last_hidden_state.shape))
62
- '''
 
1
+ # TEST MODEL
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ from transformers import pipeline
4
 
5
+ repository_id="mskov/roberta-base-toxicity"
6
+ classifier = pipeline('text-classification',repository_id)
7
 
8
+ text = "Kederis proclaims innocence Olympic champion Kostas Kederis today left hospital ahead of his date with IOC inquisitors claiming his innocence and vowing: quot;After the crucifixion comes the resurrection. quot; .."
9
+ result = classifier(text)
 
 
10
 
11
+ predicted_label = result[0]["label"]
12
+ print(f"Predicted label: {predicted_label}")