aichina's picture
v6
d834007
raw
history blame
2.82 kB
import gradio as gr
from pytube import YouTube
import requests,json
headers = {
'accept': 'application/json',
'x-gladia-key': '89b0adf5-fb2c-48ba-8a66-76b02827fd14',
# requests won't add a boundary if this header is set when you pass files=
# 'Content-Type': 'multipart/form-data',
}
openai_key = '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'
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {openai_key}',
}
def create(prompt):
data = {
"model": "text-davinci-003",
"prompt": prompt,
"temperature": 0.7,
"max_tokens": 1024,
"top_p": 1.0,
"frequency_penalty": 0.0,
"presence_penalty": 0.0
}
url = 'https://api.openai.com/v1/completions'
r = requests.post(url,headers=headers,
data=json.dumps(data))
return r.json()
def convert(res):
data = res.json()
prediction = data['prediction']
content = []
for x in prediction:
content.append(x['transcription'])
auido_txt = '\n'.join(content)
prompt = f"将下面的内容,总结10条要点出来,\n{auido_txt}"
open_ai_res = create(prompt)
answer = open_ai_res['choices'][0]['text']
res_content = f'音频内容:\n{auido_txt}\nGPT3总结的要点:\n{answer}'
return res_content
def get_audio(url):
yt = YouTube(url)
audio_file = f'{random.randint(10000,100000)}.mp4'
print(f'{url} {audio_file} start get audio ...')
yt.streams.filter(only_audio=True)[0].download(filename=audio_file)
print('aodio over ..')
return audio_file
def get_transcript(url):
audio_file = get_audio(url)
# audio_file = 'tmp.mp4'
files = {
'audio': (f"{audio_file}", open(f'{audio_file}', 'rb'), 'video/mp4'),
'audio_url': (None, ''),
'language': (None, 'english'),
'language_behaviour': (None, 'automatic single language'),
}
response = requests.post('https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files)
print(response.text)
return convert(response)
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
with gr.Row():
url = gr.Textbox(placeholder='Youtube video URL', label='URL')
with gr.Row():
gr.Markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.")
transcribe_btn = gr.Button('Transcribe')
with gr.Column():
outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
transcribe_btn.click(get_transcript, inputs=[url], outputs=outputs)
demo.launch(debug=True)