Spaces:

aichina
/

youtube-whisper-09

Sleeping

File size: 4,108 Bytes

638be90
6e941de
638be90
3b83cb7
d2639f0
b6a5731
e129c20
 
11a2742
 
 
e129c20
11a2742
 
 
 
 
 
9921105
741f9c4
11a2742
 
 
 
 
 
 
 
 
 
741f9c4
fff8b5a
 
 
 
d2639f0
ae41ebd
 
 
 
d2639f0
 
 
 
 
 
 
 
 
 
 
 
 
fc46af4
d2639f0
 
 
71dc53b
d2639f0
 
17b56f6
 
 
 
d2639f0
ae41ebd
97c6b92
8078e71
e5be212
 
 
 
 
d2639f0
af53e78
d2639f0
af53e78
17b56f6
4f921f8
17b56f6
26b8bf9
17b56f6
e5a1d61
af53e78
 
d2639f0
42f8b47
d2639f0
 
 
 
e5be212
e129c20
638be90
d834007
700e0be
 
 
 
 
 
2fd9b5b
638be90
ae41ebd
97c6b92
 
 
 
 
 
e129c20
ace44db
 
 
e129c20
 
 
 
 
fff8b5a
e129c20
e0aae73
11a2742
ae41ebd
e129c20
638be90
 
 
 
 
 
 
 
 
 
 
 
71dc53b
0c9ce61
638be90
 
9ddc002
638be90
 
 
 
 
ae41ebd
638be90

import gradio as gr

from pytube import YouTube
import random
import requests,json
import subprocess,os


def del_down_file():
    command = f'rm -rf *.mp4'
    subprocess.call(command, shell=True)

def get_video(url):


    # 下载视频
    url = url
    output_dir = '.'
    command = f'you-get  -o {output_dir} {url}'
    print(command)
    subprocess.call(command, shell=True)
    
    mp4_files = []  # 用于存储所有找到的 mp4 文件名

    # 遍历指定目录中的所有文件
    for filename in os.listdir('.'):
        # 检查文件是否以 '.mp4' 结尾
        if filename.endswith('.mp4'):
            # 如果是，将文件名添加到 mp4_files 列表中
            mp4_files.append(filename)
    print(mp4_files)
    mp4_file = mp4_files[0]
    os.rename(mp4_file, 'my_mp4.mp4')
    return 'my_mp4.mp4'
    


def create(prompt,openai_key):
  
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {openai_key}',
       
    }
    data =   {
        "model": "text-davinci-003",
        "prompt": prompt,
        "temperature": 0.7,
        "max_tokens": 1024,
        "top_p": 1.0,
        "frequency_penalty": 0.0,
        "presence_penalty": 0.0
    }
    print(headers ,prompt,openai_key)
    url = 'https://api.openai.com/v1/completions'
    r = requests.post(url,headers=headers,
            data=json.dumps(data))
    print(r.text)
    return r.json()

def split_list(l, n):
    for i in range(0, len(l), n):
        yield l[i:i+n]
        

def convert(res,openai_key):


    data = res.json()
    prediction = data['prediction']
    content = []
    for x in prediction:
        content.append(x['transcription'])
    auido_txt = '\n'.join(content)
    answer = ''

    try:
        answer = ''
        for txt_line in split_list(content,10):
            txt_line_content = '\n'.join(txt_line)
            prompt = f"\n\n将下面的内容使用简体中文总结5条要点出来：\n\n{txt_line_content}"
            open_ai_res = create(prompt,openai_key)
            answer += prompt + '\n GPT3:\n' + open_ai_res['choices'][0]['text'].strip()
    except Exception as e:
        print('open ai api error',e)

    res_content = f'{answer}'

    return res_content

    
        

def get_audio(url):
    
    yt = YouTube(url)
    audio_file = f'{random.randint(10000,100000)}.mp4'
    print(f'{url} {audio_file} start get audio ...')
    yt.streams.filter(only_audio=True)[0].download(filename=audio_file)
    print('aodio over ..')
    # audio_file = get_video(url)
    return audio_file

def get_transcript(url,openai_key):
    headers = {
        'accept': 'application/json',
        'x-gladia-key': '89b0adf5-fb2c-48ba-8a66-76b02827fd14',
        # requests won't add a boundary if this header is set when you pass files=
        # 'Content-Type': 'multipart/form-data',
    }
    audio_file = get_audio(url)

    print(audio_file)
    
    files = {
        'audio': (f"{audio_file}", open(f'{audio_file}', 'rb'), 'video/mp4'),
        'language': (None, 'english'),
        'language_behaviour': (None, 'automatic single language'),
    }
    print('get transcription from  api.gladia.io ...')
    response = requests.post('https://api.gladia.io/audio/text/audio-transcription/', headers=headers, files=files)
    print(response.text)
    del_down_file()
    return convert(response,openai_key)
    




with gr.Blocks() as demo:

    with gr.Row():

        with gr.Column():

            with gr.Row():
                url = gr.Textbox(placeholder='Youtube video URL', label='URL')
                openai_key = gr.Textbox(placeholder='Your openai key', label='OPENAI KEY')
               

            with gr.Row():
                gr.Markdown("自动从youtube视频中，获取音频内容，并使用GPT总结其要点")
                transcribe_btn = gr.Button('Transcribe')

        with gr.Column():
            outputs = gr.Textbox(placeholder='Transcription of the video', label='Transcription')

    transcribe_btn.click(get_transcript, inputs=[url,openai_key], outputs=outputs)

demo.launch(debug=True)