File size: 2,785 Bytes
6b28a91
 
 
 
c1af806
6b28a91
 
a9c020a
91f1a24
 
 
 
 
6b28a91
 
 
 
 
 
 
 
ca1a401
6b28a91
9eb21f5
 
6b28a91
 
 
3aaf62a
 
 
 
6b28a91
8bfcdbb
c298807
6b28a91
 
 
39a196c
 
72c85ef
39a196c
72c85ef
39a196c
72c85ef
8bbfb83
72c85ef
39a196c
72c85ef
9eb21f5
cad8f1b
39a196c
72c85ef
39a196c
 
 
 
6b28a91
 
 
97a0727
3aaf62a
c298807
0cb7b61
4fc6482
39a196c
 
4fc6482
 
6b28a91
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import gradio as gr
import os
import shutil
import spaces
import sys

# we will clone the repo and install the dependencies
# NOTE: Still fixing bugs, not release, do not try :) !
# os.system('pip install -r qa_mdt/requirements.txt')
# os.system('pip install xformers==0.0.26.post1')
# os.system('pip install torchlibrosa==0.0.9 librosa==0.9.2')
# os.system('pip install -q pytorch_lightning==2.1.3 torchlibrosa==0.0.9 librosa==0.9.2 ftfy==6.1.1 braceexpand')
# os.system('pip install torch==2.3.0+cu121 torchvision==0.18.0+cu121 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu121')

# only then import the necessary modules from qa_mdt
from qa_mdt.pipeline import MOSDiffusionPipeline


pipe = MOSDiffusionPipeline()

# this runs the pipeline with user input and saves the output as 'awesome.wav'
@spaces.GPU(duration=120)
def generate_waveform(description):
    high_quality_description = "high quality " + description
    pipe(high_quality_description)

    generated_file_path = "./awesome.wav"

    # if os.path.exists(generated_file_path):
    #     return generated_file_path
    # else:
    #     return "Error: Failed to generate the waveform."
    if os.path.exists(generated_file_path):
        waveform_video = gr.make_waveform(audio=generated_file_path, bg_color="#000000", bars_color="#00FF00", bar_count=100, bar_width=1.5, animate=True)
        return waveform_video, generated_file_path
    else:
        return "Error: Failed to generate the waveform."


intro = """
# 🎶 OpenMusic: Diffusion That Plays Music 🎧 🎹

Welcome to **OpenMusic**, a next-gen diffusion model designed to generate high-quality music audio from text descriptions! 

Simply enter a few words describing the vibe, and watch as the model generates a unique track for your input. 

Powered by the QA-MDT model, based on the new research paper linked below.

- [GitHub Repo](https://github.com/ivcylc/qa-mdt) by [@changli](https://github.com/ivcylc) 🎓.
- [Paper](https://arxiv.org/pdf/2405.15863)
- [HuggingFace](https://huggingface.co/jadechoghari/qa_mdt) [@jadechoghari](https://github.com/jadechoghari) 🤗.

Note: The music generation process will take 1-2 minutes 🎶
---

"""

# gradio interface
iface = gr.Interface(
    fn=generate_waveform,
    inputs=gr.Textbox(lines=2, placeholder="Enter a music description here..."),
    # outputs=gr.Audio(label="Download the Music 🎼"),
    outputs=[gr.Video(label="Watch the Waveform 🎼"), gr.Audio(label="Download the Music 🎶")],
    description=intro,
    examples=[
        ["A modern synthesizer creating futuristic soundscapes."],
        ["Acoustic ballad with heartfelt lyrics and soft piano."]
        ],
    cache_examples=True
)

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()