Spaces:
Paused
Paused
Ayushnangia
commited on
Commit
•
5167b0f
1
Parent(s):
29685bd
updating with summarizer
Browse files- app.py +35 -6
- requirements.txt +3 -1
app.py
CHANGED
@@ -1,6 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
import yt_dlp as ydlp
|
|
|
|
|
3 |
from whispercpp import Whisper
|
|
|
4 |
|
5 |
def download_audio(youtube_url, output_folder='.'):
|
6 |
ydl_opts = {
|
@@ -32,21 +35,47 @@ def process_general_transcription(transcription):
|
|
32 |
transcript_str = "\n".join(formatted_transcription)
|
33 |
|
34 |
return transcript_str
|
35 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
download_audio(youtube_url)
|
37 |
result = w.transcribe("audio.wav")
|
38 |
text = w.extract_text(result)
|
39 |
-
|
|
|
|
|
|
|
40 |
with gr.Blocks() as demo:
|
41 |
gr.Markdown(
|
42 |
"""
|
43 |
# CPP Whisperer - Transcribe YouTube Videos
|
44 |
|
45 |
""")
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
|
52 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import yt_dlp as ydlp
|
3 |
+
from transformers import pipeline
|
4 |
+
|
5 |
from whispercpp import Whisper
|
6 |
+
summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")
|
7 |
|
8 |
def download_audio(youtube_url, output_folder='.'):
|
9 |
ydl_opts = {
|
|
|
35 |
transcript_str = "\n".join(formatted_transcription)
|
36 |
|
37 |
return transcript_str
|
38 |
+
def chunk_to_tokens(text, n):
|
39 |
+
tokens = text.split()
|
40 |
+
max_chunk_size = min(len(tokens), 512)
|
41 |
+
|
42 |
+
token_size = max(1, int(max_chunk_size * (1 - n / 100)))
|
43 |
+
|
44 |
+
chunks = [" ".join(tokens[i:i + token_size]) for i in range(0, len(tokens), token_size)]
|
45 |
+
|
46 |
+
return chunks
|
47 |
+
def summarizing(text,n):
|
48 |
+
valid_tok=chunk_to_tokens(text,n)
|
49 |
+
res=""
|
50 |
+
for i in valid_tok:
|
51 |
+
res+=summarizer(i)[0]['summary_text']+'\n'
|
52 |
+
return res
|
53 |
+
def transcribe_sum_youtube(youtube_url,n):
|
54 |
download_audio(youtube_url)
|
55 |
result = w.transcribe("audio.wav")
|
56 |
text = w.extract_text(result)
|
57 |
+
res=process_general_transcription(text)
|
58 |
+
return summarizing(res,n)
|
59 |
+
|
60 |
+
|
61 |
with gr.Blocks() as demo:
|
62 |
gr.Markdown(
|
63 |
"""
|
64 |
# CPP Whisperer - Transcribe YouTube Videos
|
65 |
|
66 |
""")
|
67 |
+
with gr.Row():
|
68 |
+
with gr.Column():
|
69 |
+
|
70 |
+
inp = gr.Textbox(label="Youtube Url",placeholder="Insert YT Url here")
|
71 |
+
inp2 = gr.Slider(label="Summarization Percentage",min_value=0,max_value=100,step_size=1)
|
72 |
+
result_button_transcribe = gr.Button('Transcribe and Summarize')
|
73 |
+
|
74 |
+
with gr.Column():
|
75 |
+
out = gr.Textbox(label="Transcribed and Summarize Text")
|
76 |
+
|
77 |
+
|
78 |
+
result_button_transcribe.click(transcribe_sum_youtube, inputs = [inp,inp2] , outputs = out)
|
79 |
|
80 |
|
81 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
git+https://github.com/stlukey/whispercpp.py
|
2 |
gradio
|
3 |
-
yt_dlp
|
|
|
|
|
|
1 |
git+https://github.com/stlukey/whispercpp.py
|
2 |
gradio
|
3 |
+
yt_dlp
|
4 |
+
transformers
|
5 |
+
torch
|