Spaces:
Running
on
Zero
Running
on
Zero
import os | |
from typing import List | |
import gradio as gr | |
import tempfile | |
import numpy as np | |
import librosa | |
import soundfile as sf | |
import spaces | |
from audiosr import super_resolution | |
from audiosr_utils import load_audiosr | |
audiosr_model = load_audiosr() | |
def split_audio_to_chunks(y, sr=48000, chunk_duration=5.12) -> List[str]: | |
# Calculate the number of samples per chunk | |
chunk_samples = int(chunk_duration * sr) | |
# Split the audio into chunks | |
chunks = [y[i : i + chunk_samples] for i in range(0, len(y), chunk_samples)] | |
# Save each chunk to a temporary file | |
temp_files = [] | |
for i, chunk in enumerate(chunks): | |
# Create a temporary file | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
temp_files.append(temp_file.name) | |
# Write the chunk to the temporary file | |
sf.write(temp_file.name, chunk, sr) | |
return temp_files | |
def run_audiosr( | |
chunks: List[str], guidance_scale: float, ddim_steps: int | |
) -> np.ndarray: | |
waveforms = [] | |
for i, chunk in enumerate(chunks): | |
print(f"Processing chunk {i+1}/{len(chunks)}") | |
waveform = super_resolution( | |
audiosr_model, | |
chunk, | |
guidance_scale=guidance_scale, | |
ddim_steps=ddim_steps, | |
) | |
waveforms.append(waveform) | |
waveform = np.concatenate(waveforms, axis=-1) # (1, 1, N) | |
waveform = waveform.squeeze() | |
return waveform | |
def audiosr_infer(audio: str) -> str: | |
guidance_scale = 3.5 | |
ddim_steps = 100 | |
y, sr = librosa.load(audio, sr=48000) | |
if len(y) > 60 * sr: | |
y = y[: 60 * sr] | |
gr.Info("Audio is too long, only the first 60 seconds will be processed") | |
chunk_files = split_audio_to_chunks(y, sr=sr, chunk_duration=5.12) | |
print(f"Splited audio chunks: {chunk_files}") | |
waveform = run_audiosr(chunk_files, guidance_scale, ddim_steps) | |
sr = 44100 | |
for chunk_file in chunk_files: | |
os.remove(chunk_file) | |
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f: | |
sf.write(f.name, waveform, sr) | |
return f.name | |
models = { | |
"AudioSR": audiosr_infer, | |
} | |
def infer(audio: str, model: str, sr: int) -> str: | |
if sr > 0: | |
# resample audio | |
y, _ = librosa.load(audio, sr=sr) | |
# save resampled audio | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
sf.write(f.name, y, sr) | |
return models[model](f.name) | |
else: | |
return models[model](audio) | |
with gr.Blocks() as app: | |
with open(os.path.join(os.path.dirname(__file__), "README.md"), "r") as f: | |
README = f.read() | |
# remove yaml front matter | |
blocks = README.split("---") | |
if len(blocks) > 1: | |
README = "---".join(blocks[2:]) | |
gr.Markdown(README) | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## Upload an audio file") | |
audio = gr.Audio(label="Upload an audio file", type="filepath") | |
sr = gr.Slider( | |
value=0, | |
label="Resample audio to sample rate before inference, 0 means no resampling", | |
minimum=0, | |
maximum=48000, | |
step=1000, | |
) | |
with gr.Row(): | |
model = gr.Radio( | |
label="Select a model", | |
choices=[s for s in models.keys()], | |
value="AudioSR", | |
) | |
btn = gr.Button("Infer") | |
with gr.Row(): | |
with gr.Column(): | |
out = gr.Audio( | |
label="Output", format="mp3", type="filepath", interactive=False | |
) | |
btn.click( | |
fn=infer, | |
inputs=[audio, model, sr], | |
outputs=[out], | |
api_name="infer", | |
) | |
app.launch(show_error=True) | |