MikeTangoEcho commited on
Commit
48b9b5d
0 Parent(s):

initial commit

Browse files
Files changed (4) hide show
  1. README.md +9 -0
  2. app.py +88 -0
  3. packages.txt +1 -0
  4. requirements.txt +2 -0
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: COC
3
+ emoji: 🎤
4
+ sdk: gradio
5
+ sdk_version: 5.5.0
6
+ app_file: app.py
7
+ pinned: false
8
+ disable_embedding: true
9
+ ---
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline
3
+ import gradio as gr
4
+
5
+ # Pipelines
6
+
7
+ device = 0 if torch.cuda.is_available() else "cpu"
8
+
9
+ ## Automatic Speech Recognition
10
+ ## https://huggingface.co/docs/transformers/task_summary#automatic-speech-recognition
11
+ ## Require ffmpeg to be installed
12
+
13
+ asr_model = "openai/whisper-tiny"
14
+ asr = pipeline(
15
+ "automatic-speech-recognition",
16
+ model=asr_model,
17
+ # torch_dtype=torch.float16,
18
+ device=device
19
+ )
20
+
21
+ ## Token Classification / Name Entity Recognition
22
+ ## https://huggingface.co/docs/transformers/task_summary#token-classification
23
+ tc_model = "dslim/distilbert-NER"
24
+ tc = pipeline(
25
+ "token-classification", # ner
26
+ model=ner_model,
27
+ device=device
28
+ )
29
+
30
+ # ---
31
+
32
+ # Transformers
33
+
34
+ # https://www.gradio.app/main/docs/gradio/audio#behavior
35
+ # As output component: expects audio data in any of these formats:
36
+ # - a str or pathlib.Path filepath
37
+ # - or URL to an audio file,
38
+ # - or a bytes object (recommended for streaming),
39
+ # - or a tuple of (sample rate in Hz, audio data as numpy array)
40
+ def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
41
+ if audio is None:
42
+ return "..."
43
+ # TODO Manage str/Path
44
+
45
+ text = ""
46
+ # https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
47
+ # Whisper input format for tuple differ from output provided by gradio audio component
48
+ if asr_model.startswith("openai/whisper"):
49
+ inputs = {"sampling_rate": audio[0], "raw": audio[1]} if type(audio) is tuple and else audio
50
+ transcript = asr(inputs)
51
+ text = transcript['text']
52
+
53
+ entities = tc(text)
54
+ # TODO Add Text Classification for sentiment analysis
55
+ return {"text": text, "entities": entities}
56
+
57
+ # ---
58
+
59
+ # Gradio
60
+
61
+ ## Interfaces
62
+
63
+ # https://www.gradio.app/main/docs/gradio/audio
64
+ input_audio = gr.Audio(
65
+ sources=["upload", "microphone"],
66
+ show_share_button=False
67
+ )
68
+
69
+ ## App
70
+
71
+ gradio_app = gr.Interface(
72
+ transcribe,
73
+ inputs=[
74
+ input_audio
75
+ ],
76
+ outputs=[
77
+ gr.HighlightedText()
78
+ ],
79
+ theme="huggingface"
80
+ title="ASRNERSBX"
81
+ description=(
82
+ "Transcribe, Tokenize, Classify"
83
+ )
84
+ allow_flagging="never"
85
+ )
86
+
87
+ ## Start!
88
+ gradio_app.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ torch