Spaces:
Paused
Paused
MikeTangoEcho
commited on
Commit
•
48b9b5d
0
Parent(s):
initial commit
Browse files- README.md +9 -0
- app.py +88 -0
- packages.txt +1 -0
- requirements.txt +2 -0
README.md
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: COC
|
3 |
+
emoji: 🎤
|
4 |
+
sdk: gradio
|
5 |
+
sdk_version: 5.5.0
|
6 |
+
app_file: app.py
|
7 |
+
pinned: false
|
8 |
+
disable_embedding: true
|
9 |
+
---
|
app.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import pipeline
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
# Pipelines
|
6 |
+
|
7 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
8 |
+
|
9 |
+
## Automatic Speech Recognition
|
10 |
+
## https://huggingface.co/docs/transformers/task_summary#automatic-speech-recognition
|
11 |
+
## Require ffmpeg to be installed
|
12 |
+
|
13 |
+
asr_model = "openai/whisper-tiny"
|
14 |
+
asr = pipeline(
|
15 |
+
"automatic-speech-recognition",
|
16 |
+
model=asr_model,
|
17 |
+
# torch_dtype=torch.float16,
|
18 |
+
device=device
|
19 |
+
)
|
20 |
+
|
21 |
+
## Token Classification / Name Entity Recognition
|
22 |
+
## https://huggingface.co/docs/transformers/task_summary#token-classification
|
23 |
+
tc_model = "dslim/distilbert-NER"
|
24 |
+
tc = pipeline(
|
25 |
+
"token-classification", # ner
|
26 |
+
model=ner_model,
|
27 |
+
device=device
|
28 |
+
)
|
29 |
+
|
30 |
+
# ---
|
31 |
+
|
32 |
+
# Transformers
|
33 |
+
|
34 |
+
# https://www.gradio.app/main/docs/gradio/audio#behavior
|
35 |
+
# As output component: expects audio data in any of these formats:
|
36 |
+
# - a str or pathlib.Path filepath
|
37 |
+
# - or URL to an audio file,
|
38 |
+
# - or a bytes object (recommended for streaming),
|
39 |
+
# - or a tuple of (sample rate in Hz, audio data as numpy array)
|
40 |
+
def transcribe(audio: str | Path | bytes | tuple[int, np.ndarray] | None):
|
41 |
+
if audio is None:
|
42 |
+
return "..."
|
43 |
+
# TODO Manage str/Path
|
44 |
+
|
45 |
+
text = ""
|
46 |
+
# https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.AutomaticSpeechRecognitionPipeline.__call__
|
47 |
+
# Whisper input format for tuple differ from output provided by gradio audio component
|
48 |
+
if asr_model.startswith("openai/whisper"):
|
49 |
+
inputs = {"sampling_rate": audio[0], "raw": audio[1]} if type(audio) is tuple and else audio
|
50 |
+
transcript = asr(inputs)
|
51 |
+
text = transcript['text']
|
52 |
+
|
53 |
+
entities = tc(text)
|
54 |
+
# TODO Add Text Classification for sentiment analysis
|
55 |
+
return {"text": text, "entities": entities}
|
56 |
+
|
57 |
+
# ---
|
58 |
+
|
59 |
+
# Gradio
|
60 |
+
|
61 |
+
## Interfaces
|
62 |
+
|
63 |
+
# https://www.gradio.app/main/docs/gradio/audio
|
64 |
+
input_audio = gr.Audio(
|
65 |
+
sources=["upload", "microphone"],
|
66 |
+
show_share_button=False
|
67 |
+
)
|
68 |
+
|
69 |
+
## App
|
70 |
+
|
71 |
+
gradio_app = gr.Interface(
|
72 |
+
transcribe,
|
73 |
+
inputs=[
|
74 |
+
input_audio
|
75 |
+
],
|
76 |
+
outputs=[
|
77 |
+
gr.HighlightedText()
|
78 |
+
],
|
79 |
+
theme="huggingface"
|
80 |
+
title="ASRNERSBX"
|
81 |
+
description=(
|
82 |
+
"Transcribe, Tokenize, Classify"
|
83 |
+
)
|
84 |
+
allow_flagging="never"
|
85 |
+
)
|
86 |
+
|
87 |
+
## Start!
|
88 |
+
gradio_app.launch()
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ffmpeg
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
torch
|