mimbres commited on
Commit
93dfb16
1 Parent(s): ed38133

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -2
app.py CHANGED
@@ -123,7 +123,12 @@ def play_video(youtube_url):
123
 
124
 
125
  AUDIO_EXAMPLES = glob.glob('examples/*.*', recursive=True)
126
- YOUTUBE_EXAMPLES = ["https://www.youtube.com/watch?v=vMboypSkj3c"]
 
 
 
 
 
127
 
128
  # theme = 'gradio/dracula_revamped' #'Insuz/Mocha' #gr.themes.Soft()
129
  # with gr.Blocks(theme=theme) as demo:
@@ -157,9 +162,18 @@ with gr.Blocks(theme=theme, css=css) as demo:
157
  gr.Markdown(
158
  """
159
  ## 🎶YourMT3+: Multi-instrument Music Transcription with Enhanced Transformer Architectures and Cross-dataset Stem Augmentation
 
 
 
 
 
 
 
 
 
160
  #### Caution:
161
  - Currently running on CPU, and it takes longer than 3 minutes for a 30-second input.
162
- - For acadmic reproduction purpose, we strongly recommend to use or [Colab Demo](https://colab.research.google.com/drive/1AgOVEBfZknDkjmSRA7leoa81a2vrnhBG?usp=sharing) with multiple checkpoints.
163
  ### [Paper](https://arxiv.org/abs/2407.04822) [Code](https://github.com/mimbres/YourMT3)
164
  """)
165
 
 
123
 
124
 
125
  AUDIO_EXAMPLES = glob.glob('examples/*.*', recursive=True)
126
+ YOUTUBE_EXAMPLES = ["https://www.youtube.com/watch?v=vMboypSkj3c",
127
+ "https://youtu.be/OXXRoa1U6xU?si=nhJ6lzGenCmk4P7R",
128
+ "https://youtu.be/EOJ0wH6h3rE?si=a99k6BnSajvNmXcn",
129
+ "https://youtu.be/7mjQooXt28o?si=qqmMxCxwqBlLPDI2",
130
+ "https://youtu.be/bnS-HK_lTHA?si=PQLVAab3QHMbv0S3https://youtu.be/zJB0nnOc7bM?si=EA1DN8nHWJcpQWp_",
131
+ "https://youtu.be/mIWYTg55h10?si=WkbtKfL6NlNquvT8"]
132
 
133
  # theme = 'gradio/dracula_revamped' #'Insuz/Mocha' #gr.themes.Soft()
134
  # with gr.Blocks(theme=theme) as demo:
 
162
  gr.Markdown(
163
  """
164
  ## 🎶YourMT3+: Multi-instrument Music Transcription with Enhanced Transformer Architectures and Cross-dataset Stem Augmentation
165
+ ### Model card:
166
+ - Model name: `YPTF.MoE+Multi`
167
+ - Encoder backbone: Perceiver-TF + Mixture of Experts (2/8)
168
+ - Decoder backbone: Multi-channel T5-small
169
+ - Tokenizer: MT3 tokens with Singing extension
170
+ - Dataset: YourMT3 dataset
171
+ - Augmentation strategy: Intra-/Cross dataset stem augment, No Pitch-shifting
172
+ - FP Precision: BF16-mixed for training, FP16 for inference
173
+
174
  #### Caution:
175
  - Currently running on CPU, and it takes longer than 3 minutes for a 30-second input.
176
+ - For acadmic reproduction purpose, we strongly recommend to use [Colab Demo](https://colab.research.google.com/drive/1AgOVEBfZknDkjmSRA7leoa81a2vrnhBG?usp=sharing) with multiple checkpoints.
177
  ### [Paper](https://arxiv.org/abs/2407.04822) [Code](https://github.com/mimbres/YourMT3)
178
  """)
179