aichina's picture
sf
b6a5731
raw
history blame
4.11 kB
import gradio as gr
from pytube import YouTube
import random
import requests,json
import subprocess,os
def del_down_file():
command = f'rm -rf *.mp4'
subprocess.call(command, shell=True)
def get_video(url):
# 下载视频
url = url
output_dir = '.'
command = f'you-get -f mp4 -o {output_dir} {url}'
print(command)
subprocess.call(command, shell=True)
mp4_files = [] # 用于存储所有找到的 mp4 文件名
# 遍历指定目录中的所有文件
for filename in os.listdir('.'):
# 检查文件是否以 '.mp4' 结尾
if filename.endswith('.mp4'):
# 如果是,将文件名添加到 mp4_files 列表中
mp4_files.append(filename)
print(mp4_files)
mp4_file = mp4_files[0]
os.rename(mp4_file, 'my_mp4.mp4')
return 'my_mp4.mp4'
def create(prompt,openai_key):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {openai_key}',
}
data = {
"model": "text-davinci-003",
"prompt": prompt,
"temperature": 0.7,
"max_tokens": 1024,
"top_p": 1.0,
"frequency_penalty": 0.0,
"presence_penalty": 0.0
}
print(headers ,prompt,openai_key)
url = 'https://api.openai.com/v1/completions'
r = requests.post(url,headers=headers,
data=json.dumps(data))
print(r.text)
return r.json()
def split_list(l, n):
for i in range(0, len(l), n):
yield l[i:i+n]
def convert(res,openai_key):
data = res.json()
prediction = data['prediction']
content = []
for x in prediction:
content.append(x['transcription'])
auido_txt = '\n'.join(content)
answer = ''
try:
answer = ''
for txt_line in split_list(content,10):
txt_line_content = '\n'.join(txt_line)
prompt = f"\n\n将下面的内容使用简体中文总结5条要点出来:\n\n{txt_line_content}"
open_ai_res = create(prompt,openai_key)
answer += prompt + '\n GPT3:\n' + open_ai_res['choices'][0]['text']
except Exception as e:
print('open ai api error',e)
res_content = f'{answer}'
return res_content
def get_audio(url):
# yt = YouTube(url)
# audio_file = f'{random.randint(10000,100000)}.mp4'
# print(f'{url} {audio_file} start get audio ...')
# yt.streams.filter(only_audio=True)[0].download(filename=audio_file)
# print('aodio over ..')
audio_file = get_video(url)
return audio_file
def get_transcript(url,openai_key):
headers = {
'accept': 'application/json',
'x-gladia-key': '89b0adf5-fb2c-48ba-8a66-76b02827fd14',
# requests won't add a boundary if this header is set when you pass files=
# 'Content-Type': 'multipart/form-data',
}
audio_file = get_audio(url)
print(audio_file)
files = {
'audio': (f"{audio_file}", open(f'{audio_file}', 'rb'), 'video/mp4'),
'language': (None, 'english'),
'language_behaviour': (None, 'automatic single language'),
}
print('get transcription from api.gladia.io ...')
response = requests.post('https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files)
print(response.text)
del_down_file()
return convert(response,openai_key)
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
with gr.Row():
url = gr.Textbox(placeholder='Youtube video URL', label='URL')
openai_key = gr.Textbox(placeholder='Your openai key', label='OPENAI KEY')
with gr.Row():
gr.Markdown("自动从youtube视频中,获取音频内容,并使用GPT总结其要点")
transcribe_btn = gr.Button('Transcribe')
with gr.Column():
outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
transcribe_btn.click(get_transcript, inputs=[url,openai_key], outputs=outputs)
demo.launch(debug=True)