Spaces:
Sleeping
Sleeping
import gradio as gr | |
from pytube import YouTube | |
import random | |
import requests,json | |
import subprocess,os | |
def del_down_file(): | |
command = f'rm -rf *.mp4' | |
subprocess.call(command, shell=True) | |
def get_video(url): | |
# 下载视频 | |
url = url | |
output_dir = '.' | |
command = f'you-get -f mp4 -o {output_dir} {url}' | |
print(command) | |
subprocess.call(command, shell=True) | |
mp4_files = [] # 用于存储所有找到的 mp4 文件名 | |
# 遍历指定目录中的所有文件 | |
for filename in os.listdir('.'): | |
# 检查文件是否以 '.mp4' 结尾 | |
if filename.endswith('.mp4'): | |
# 如果是,将文件名添加到 mp4_files 列表中 | |
mp4_files.append(filename) | |
print(mp4_files) | |
mp4_file = mp4_files[0] | |
os.rename(mp4_file, 'my_mp4.mp4') | |
return 'my_mp4.mp4' | |
def create(prompt,openai_key): | |
headers = { | |
'Content-Type': 'application/json', | |
'Authorization': f'Bearer {openai_key}', | |
} | |
data = { | |
"model": "text-davinci-003", | |
"prompt": prompt, | |
"temperature": 0.7, | |
"max_tokens": 1024, | |
"top_p": 1.0, | |
"frequency_penalty": 0.0, | |
"presence_penalty": 0.0 | |
} | |
print(headers ,prompt,openai_key) | |
url = 'https://api.openai.com/v1/completions' | |
r = requests.post(url,headers=headers, | |
data=json.dumps(data)) | |
print(r.text) | |
return r.json() | |
def split_list(l, n): | |
for i in range(0, len(l), n): | |
yield l[i:i+n] | |
def convert(res,openai_key): | |
data = res.json() | |
prediction = data['prediction'] | |
content = [] | |
for x in prediction: | |
content.append(x['transcription']) | |
auido_txt = '\n'.join(content) | |
answer = '' | |
try: | |
answer = '' | |
for txt_line in split_list(content,10): | |
txt_line_content = '\n'.join(txt_line) | |
prompt = f"\n\n将下面的内容使用简体中文总结5条要点出来:\n\n{txt_line_content}" | |
open_ai_res = create(prompt,openai_key) | |
answer += prompt + '\n GPT3:\n' + open_ai_res['choices'][0]['text'] | |
except Exception as e: | |
print('open ai api error',e) | |
res_content = f'{answer}' | |
return res_content | |
def get_audio(url): | |
# yt = YouTube(url) | |
# audio_file = f'{random.randint(10000,100000)}.mp4' | |
# print(f'{url} {audio_file} start get audio ...') | |
# yt.streams.filter(only_audio=True)[0].download(filename=audio_file) | |
# print('aodio over ..') | |
audio_file = get_video(url) | |
return audio_file | |
def get_transcript(url,openai_key): | |
headers = { | |
'accept': 'application/json', | |
'x-gladia-key': '89b0adf5-fb2c-48ba-8a66-76b02827fd14', | |
# requests won't add a boundary if this header is set when you pass files= | |
# 'Content-Type': 'multipart/form-data', | |
} | |
audio_file = get_audio(url) | |
print(audio_file) | |
files = { | |
'audio': (f"{audio_file}", open(f'{audio_file}', 'rb'), 'video/mp4'), | |
'language': (None, 'english'), | |
'language_behaviour': (None, 'automatic single language'), | |
} | |
print('get transcription from api.gladia.io ...') | |
response = requests.post('https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files) | |
print(response.text) | |
del_down_file() | |
return convert(response,openai_key) | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
url = gr.Textbox(placeholder='Youtube video URL', label='URL') | |
openai_key = gr.Textbox(placeholder='Your openai key', label='OPENAI KEY') | |
with gr.Row(): | |
gr.Markdown("自动从youtube视频中,获取音频内容,并使用GPT总结其要点") | |
transcribe_btn = gr.Button('Transcribe') | |
with gr.Column(): | |
outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription') | |
transcribe_btn.click(get_transcript, inputs=[url,openai_key], outputs=outputs) | |
demo.launch(debug=True) | |