Spaces:

aichina
/

youtube-whisper-09

Sleeping

File size: 3,292 Bytes

638be90
6e941de
638be90
3b83cb7
d2639f0
e129c20
 
 
d2639f0
ae41ebd
 
 
 
d2639f0
 
 
 
 
 
 
 
 
 
 
 
 
fc46af4
d2639f0
 
 
71dc53b
d2639f0
 
17b56f6
 
 
 
d2639f0
ae41ebd
97c6b92
8078e71
e5be212
 
 
 
 
d2639f0
af53e78
d2639f0
af53e78
17b56f6
4f921f8
17b56f6
42f8b47
17b56f6
42f8b47
af53e78
 
d2639f0
42f8b47
d2639f0
 
 
 
e5be212
e129c20
638be90
d834007
638be90
d834007
 
 
2fd9b5b
 
638be90
ae41ebd
97c6b92
 
 
 
 
 
e129c20
d834007
e129c20
 
 
 
 
 
e0aae73
ae41ebd
e129c20
638be90
 
 
 
 
 
 
 
 
 
 
 
71dc53b
0c9ce61
638be90
 
 
 
 
 
 
 
ae41ebd
638be90

import gradio as gr

from pytube import YouTube
import random
import requests,json





def create(prompt,openai_key):
  
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {openai_key}',
       
    }
    data =   {
        "model": "text-davinci-003",
        "prompt": prompt,
        "temperature": 0.7,
        "max_tokens": 1024,
        "top_p": 1.0,
        "frequency_penalty": 0.0,
        "presence_penalty": 0.0
    }
    print(headers ,prompt,openai_key)
    url = 'https://api.openai.com/v1/completions'
    r = requests.post(url,headers=headers,
            data=json.dumps(data))
    print(r.text)
    return r.json()

def split_list(l, n):
    for i in range(0, len(l), n):
        yield l[i:i+n]
        

def convert(res,openai_key):


    data = res.json()
    prediction = data['prediction']
    content = []
    for x in prediction:
        content.append(x['transcription'])
    auido_txt = '\n'.join(content)
    answer = ''

    try:
        answer = ''
        for txt_line in split_list(content,10):
            txt_line_content = '\n'.join(txt_line)
            prompt = f"将下面的内容使用简体中文总结5条要点出来：\n\n{txt_line_content}"
            open_ai_res = create(prompt,openai_key)
            answer += prompt + '\n GPT3:\n' + open_ai_res['choices'][0]['text']
    except Exception as e:
        print('open ai api error',e)

    res_content = f'{answer}'

    return res_content

    
        

def get_audio(url):
    
    yt = YouTube(url)
    audio_file = f'{random.randint(10000,100000)}.mp4'
    print(f'{url} {audio_file} start get audio ...')
    yt.streams.filter(only_audio=True)[0].download(filename=audio_file)
    print('aodio over ..')
    return audio_file

def get_transcript(url,openai_key):
    headers = {
        'accept': 'application/json',
        'x-gladia-key': '89b0adf5-fb2c-48ba-8a66-76b02827fd14',
        # requests won't add a boundary if this header is set when you pass files=
        # 'Content-Type': 'multipart/form-data',
    }
    audio_file = get_audio(url)
    # audio_file = 'tmp.mp4'
    files = {
        'audio': (f"{audio_file}", open(f'{audio_file}', 'rb'), 'video/mp4'),
        'language': (None, 'english'),
        'language_behaviour': (None, 'automatic single language'),
    }
    response = requests.post('https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files)
    print(response.text)
    return convert(response,openai_key)
    




with gr.Blocks() as demo:

    with gr.Row():

        with gr.Column():

            with gr.Row():
                url = gr.Textbox(placeholder='Youtube video URL', label='URL')
                openai_key = gr.Textbox(placeholder='Your openai key', label='OPENAI KEY')
               

            with gr.Row():
                gr.Markdown("Larger models are more accurate, but slower. For 1min video, it'll take ~30s (tiny), ~1min (base), ~3min (small), ~5min (medium), etc.")
                transcribe_btn = gr.Button('Transcribe')

        with gr.Column():
            outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription')

    transcribe_btn.click(get_transcript, inputs=[url,openai_key], outputs=outputs)

demo.launch(debug=True)