import gradio as gr

from pytube import YouTube


import requests,json

headers = {
    'accept': 'application/json',
    'x-gladia-key': '89b0adf5-fb2c-48ba-8a66-76b02827fd14',
    # requests won't add a boundary if this header is set when you pass files=
    # 'Content-Type': 'multipart/form-data',
}


openai_key = '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'

headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {openai_key}',
       
    }

def create(prompt):
    data =   {
        "model": "text-davinci-003",
        "prompt": prompt,
        "temperature": 0.7,
        "max_tokens": 1024,
        "top_p": 1.0,
        "frequency_penalty": 0.0,
        "presence_penalty": 0.0
    }
    url = 'https://api.openai.com/v1/completions'
    r = requests.post(url,headers=headers,
            data=json.dumps(data))
    return r.json()


def convert(res):
    data = res.json()
    prediction = data['prediction']
    content = []
    for x in prediction:
        content.append(x['transcription'])
    auido_txt = '\n'.join(content)

    prompt = f"将下面的内容，总结10条要点出来，\n{auido_txt}"
    open_ai_res = create(prompt)
    answer = open_ai_res['choices'][0]['text']

    res_content = f'音频内容：\n{auido_txt}\nGPT3总结的要点：\n{answer}'

    return res_content

    
def get_audio(url):
    
    yt = YouTube(url)
    audio_file = f'{random.randint(10000,100000)}.mp4'
    print(f'{url} {audio_file} start get audio ...')
    yt.streams.filter(only_audio=True)[0].download(filename=audio_file)
    print('aodio over ..')
    return audio_file

def get_transcript(url):
    audio_file = get_audio(url)
    # audio_file = 'tmp.mp4'
    files = {
        'audio': (f"{audio_file}", open(f'{audio_file}', 'rb'), 'video/mp4'),
        'audio_url': (None, ''),
        'language': (None, 'english'),
        'language_behaviour': (None, 'automatic single language'),
    }
    response = requests.post('https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files)
    print(response.text)
    return convert(response)
    

with gr.Blocks() as demo:

    with gr.Row():

        with gr.Column():

            with gr.Row():
                url = gr.Textbox(placeholder='Youtube video URL', label='URL')
               

            with gr.Row():
                gr.Markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.")
                transcribe_btn = gr.Button('Transcribe')

        with gr.Column():
            outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription')

    transcribe_btn.click(get_transcript, inputs=[url], outputs=outputs)

demo.launch(debug=True)