Spaces:
Sleeping
Sleeping
yellowcandle
commited on
Commit
•
5576fae
1
Parent(s):
27fbf39
changed how the transcription is done
Browse files
app.py
CHANGED
@@ -1,33 +1,9 @@
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
-
import logging
|
5 |
-
from pytube import YouTube
|
6 |
import torch
|
7 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM, AutoTokenizer
|
8 |
|
9 |
-
def get_text(url):
|
10 |
-
if url != '':
|
11 |
-
output_text_transcribe = ''
|
12 |
-
|
13 |
-
yt = YouTube(url)
|
14 |
-
video = yt.streams.filter(only_audio=True).first()
|
15 |
-
out_file = video.download(output_path=".")
|
16 |
-
|
17 |
-
file_stats = os.stat(out_file)
|
18 |
-
logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
|
19 |
-
|
20 |
-
if file_stats.st_size <= 30000000:
|
21 |
-
base, ext = os.path.splitext(out_file)
|
22 |
-
new_file = base + '.mp3'
|
23 |
-
os.rename(out_file, new_file)
|
24 |
-
a = new_file
|
25 |
-
|
26 |
-
result = model.transcribe(a)
|
27 |
-
return result['text'].strip()
|
28 |
-
else:
|
29 |
-
logging.error('Videos for transcription on this space are limited to about 1.5 hours. Sorry about this limit but some joker thought they could stop this tool from working by transcribing many extremely long videos. Please visit https://steve.digital to contact me about this space.')
|
30 |
-
|
31 |
@spaces.GPU(duration=60)
|
32 |
def transcribe_audio(audio, model_id):
|
33 |
if audio is None:
|
@@ -67,17 +43,12 @@ def proofread(text):
|
|
67 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
68 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
model
|
75 |
-
|
76 |
-
input_text = prompt + text
|
77 |
-
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
78 |
-
output = model.generate(input_ids, max_length=len(input_ids[0]) + 50, num_return_sequences=1, temperature=0.7)
|
79 |
-
proofread_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
80 |
-
|
81 |
return proofread_text
|
82 |
|
83 |
with gr.Blocks() as demo:
|
@@ -89,9 +60,7 @@ with gr.Blocks() as demo:
|
|
89 |
""")
|
90 |
|
91 |
with gr.Row():
|
92 |
-
|
93 |
-
audio = gr.Audio(sources="upload", type="filepath")
|
94 |
-
input_text_url = gr.Textbox(label="Video URL")
|
95 |
model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"], value="openai/whisper-large-v3")
|
96 |
|
97 |
transcribe_button = gr.Button("Transcribe")
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
import os
|
|
|
|
|
4 |
import torch
|
5 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM, AutoTokenizer
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
@spaces.GPU(duration=60)
|
8 |
def transcribe_audio(audio, model_id):
|
9 |
if audio is None:
|
|
|
43 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
44 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
45 |
|
46 |
+
messages = [
|
47 |
+
{"role": "system", "content": "用繁體中文整理這段文字,在最後加上整段文字的重點。"},
|
48 |
+
{"role": "user", "content": text},
|
49 |
+
]
|
50 |
+
pipe = pipeline("text-generation", model="hfl/llama-3-chinese-8b-instruct-v3")
|
51 |
+
proofread_text = pipe(messages)
|
|
|
|
|
|
|
|
|
|
|
52 |
return proofread_text
|
53 |
|
54 |
with gr.Blocks() as demo:
|
|
|
60 |
""")
|
61 |
|
62 |
with gr.Row():
|
63 |
+
audio = gr.Audio(sources="upload", type="filepath")
|
|
|
|
|
64 |
model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"], value="openai/whisper-large-v3")
|
65 |
|
66 |
transcribe_button = gr.Button("Transcribe")
|