import gradio as gr from pytube import YouTube import random import requests,json def create(prompt,openai_key): headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {openai_key}', } data = { "model": "text-davinci-003", "prompt": prompt, "temperature": 0.7, "max_tokens": 1024, "top_p": 1.0, "frequency_penalty": 0.0, "presence_penalty": 0.0 } url = 'https://api.openai.com/v1/completions' r = requests.post(url,headers=headers, data=json.dumps(data)) return r.json() def convert(res,openai_key): headers = { 'accept': 'application/json', 'x-gladia-key': '89b0adf5-fb2c-48ba-8a66-76b02827fd14', # requests won't add a boundary if this header is set when you pass files= # 'Content-Type': 'multipart/form-data', } data = res.json() prediction = data['prediction'] content = [] for x in prediction: content.append(x['transcription']) auido_txt = '\n'.join(content) prompt = f"将下面的内容,总结10条要点出来,\n{auido_txt}" open_ai_res = create(prompt,openai_key) answer = open_ai_res['choices'][0]['text'] res_content = f'音频内容:\n{auido_txt}\nGPT3总结的要点:\n{answer}' return res_content def get_audio(url): yt = YouTube(url) audio_file = f'{random.randint(10000,100000)}.mp4' print(f'{url} {audio_file} start get audio ...') yt.streams.filter(only_audio=True)[0].download(filename=audio_file) print('aodio over ..') return audio_file def get_transcript(url,openai_key): audio_file = get_audio(url) # audio_file = 'tmp.mp4' files = { 'audio': (f"{audio_file}", open(f'{audio_file}', 'rb'), 'video/mp4'), 'language': (None, 'english'), 'language_behaviour': (None, 'automatic single language'), } response = requests.post('https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files) print(response.text) return convert(response,openai_key) with gr.Blocks() as demo: with gr.Row(): with gr.Column(): with gr.Row(): url = gr.Textbox(placeholder='Youtube video URL', label='URL') openai_key = gr.Textbox(placeholder='Your openai key', label='key') with gr.Row(): gr.Markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.") transcribe_btn = gr.Button('Transcribe') with gr.Column(): outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription') transcribe_btn.click(get_transcript, inputs=[url,openai_key], outputs=outputs) demo.launch(debug=True)