Spaces:
Sleeping
Sleeping
File size: 4,599 Bytes
10a0c3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# (YouTube) video to audio"
],
"metadata": {
"id": "kNt1V_xZCYzb"
}
},
{
"cell_type": "markdown",
"source": [
"- Author: [Pierre Guillou](https://www.linkedin.com/in/pierreguillou/)\n",
"- Date: 08/10/2023"
],
"metadata": {
"id": "Fa6V8oEynFe-"
}
},
{
"cell_type": "code",
"source": [
"%%capture\n",
"#!apt-get install -y ffmpeg\n",
"!python3 -m pip install -U yt-dlp\n",
"!pip install unidecode\n",
"!pip install gradio\n",
"!pip install pydub"
],
"metadata": {
"id": "S4yB5r9RCdkH"
},
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"source": [
"import gradio as gr\n",
"import re, unidecode\n",
"from unidecode import unidecode\n",
"import yt_dlp\n",
"import os\n",
"import pydub\n",
"import numpy as np\n",
"\n",
"# no space, punctuation, accent in lower string\n",
"def cleanString(string):\n",
" cleanString = unidecode(string)\n",
" cleanString = re.sub('\\W+','_', cleanString)\n",
" return cleanString.lower()\n",
"\n",
"# from audio file path to sample rate and numpy array\n",
"def read_audio(f, normalized=False):\n",
" \"\"\"MP3 to numpy array\"\"\"\n",
" a = pydub.AudioSegment.from_mp3(f)\n",
" y = np.array(a.get_array_of_samples())\n",
" if a.channels == 2:\n",
" y = y.reshape((-1, 2))\n",
" if normalized:\n",
" return a.frame_rate, np.float32(y) / 2**15\n",
" else:\n",
" return a.frame_rate, y\n",
"\n",
"# from YouTube url to audio file path and sample rate + numpy array\n",
"def download_audio(url):\n",
"\n",
" path_to_folder_audio_mp3 = \"./audio/\"\n",
" ydl_opts = {\n",
" 'format': 'm4a/bestaudio/best',\n",
" 'outtmpl': f'{path_to_folder_audio_mp3}%(title)s',\n",
" 'postprocessors': [{\n",
" 'key': 'FFmpegExtractAudio',\n",
" 'preferredcodec': 'mp3',\n",
" }]\n",
" }\n",
"\n",
" with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
" info_dict = ydl.extract_info(url, download=True)\n",
" video_title = info_dict['title']\n",
"\n",
" # Rename the audio file\n",
" local_link = video_title + \".mp3\"\n",
" new_local_link = cleanString(video_title) + \".mp3\"\n",
" for filename in os.listdir(path_to_folder_audio_mp3):\n",
" if cleanString(local_link) == cleanString(filename):\n",
" os.rename(os.path.join(path_to_folder_audio_mp3, filename),os.path.join(path_to_folder_audio_mp3, new_local_link))\n",
"\n",
" # get audio file path\n",
" file_path = path_to_folder_audio_mp3 + new_local_link\n",
"\n",
" return file_path, read_audio(file_path)\n",
"\n",
"# Gradio interface\n",
"iface = gr.Interface(fn=download_audio,\n",
" inputs=gr.Textbox(label=\"YouTube Video URL\"),\n",
" outputs=[\n",
" gr.File(label=\"Output Audio File\"),\n",
" gr.Audio(label=\"Play Audio\", show_download_button=False, format=\"mp3\"),\n",
" ],\n",
" allow_flagging=\"never\",\n",
" title=\"YouTube Video to Audio (mp3)\",\n",
" description=\"Just paste any YouTube video url and get its corresponding audio file in mp3.\",\n",
" )\n",
"iface.launch()"
],
"metadata": {
"id": "9YvB5hBloP1f"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# END"
],
"metadata": {
"id": "u9QYxqjtnzCD"
}
}
]
} |