Imagen-POP-Music-Medley-Diffusion-Transformer

Sleeping

App Files Files Community

asigalov61 commited on Jun 26

Commit

d26af00

•

1 Parent(s): b7b4e7f

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -145

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os.path
 import time as reqtime
 import datetime
@@ -23,15 +23,15 @@ in_space = os.getenv("SYSTEM") == "spaces"
 # =================================================================================================
 @spaces.GPU
-def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type, input_strip_notes):
     print('=' * 70)
     print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
     start_time = reqtime.time()
     print('Loading model...')
-    SEQ_LEN = 8192 # Models seq len
-    PAD_IDX = 707 # Models pad index
     DEVICE = 'cuda' # 'cuda'
     # instantiate the model
@@ -39,7 +39,7 @@ def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type,
     model = TransformerWrapper(
         num_tokens = PAD_IDX+1,
         max_seq_len = SEQ_LEN,
-        attn_layers = Decoder(dim = 2048, depth = 4, heads = 16, attn_flash = True)
         )
     model = AutoregressiveWrapper(model, ignore_index = PAD_IDX)
@@ -50,7 +50,7 @@ def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type,
     print('Loading model checkpoint...')
     model.load_state_dict(
-        torch.load('Chords_Progressions_Transformer_Small_2048_Trained_Model_12947_steps_0.9316_loss_0.7386_acc.pth',
                    map_location=DEVICE))
     print('=' * 70)
@@ -59,145 +59,15 @@ def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type,
     if DEVICE == 'cpu':
         dtype = torch.bfloat16
     else:
-        dtype = torch.float16
     ctx = torch.amp.autocast(device_type=DEVICE, dtype=dtype)
     print('Done!')
     print('=' * 70)
-    fn = os.path.basename(input_midi.name)
-    fn1 = fn.split('.')[0]
-    input_num_tokens = max(4, min(128, input_num_tokens))
-    print('-' * 70)
-    print('Input file name:', fn)
-    print('Req num toks:', input_num_tokens)
-    print('Conditioning type:', input_conditioning_type)
-    print('Strip notes:', input_strip_notes)
     print('-' * 70)
-    #===============================================================================
-    raw_score = TMIDIX.midi2single_track_ms_score(input_midi.name)
-    #===============================================================================
-    # Enhanced score notes
-    escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0]
-    no_drums_escore_notes = [e for e in escore_notes if e[6] < 80]
-    if len(no_drums_escore_notes) > 0:
-        #=======================================================
-        # PRE-PROCESSING
-        #===============================================================================
-        # Augmented enhanced score notes
-        no_drums_escore_notes = TMIDIX.augment_enhanced_score_notes(no_drums_escore_notes)
-        cscore = TMIDIX.chordify_score([1000, no_drums_escore_notes])
-        clean_cscore = []
-        for c in cscore:
-            pitches = []
-            cho = []
-            for cc in c:
-                if cc[4] not in pitches:
-                    cho.append(cc)
-                    pitches.append(cc[4])
-            clean_cscore.append(cho)
-        #=======================================================
-        # FINAL PROCESSING
-        melody_chords = []
-        chords = []
-        times = [0]
-        durs = []
-        #=======================================================
-        # MAIN PROCESSING CYCLE
-        #=======================================================
-        pe = clean_cscore[0][0]
-        first_chord = True
-        for c in clean_cscore:
-            # Chords
-            c.sort(key=lambda x: x[4], reverse=True)
-            tones_chord = sorted(set([cc[4] % 12 for cc in c]))
-            try:
-                chord_token = TMIDIX.ALL_CHORDS_SORTED.index(tones_chord)
-            except:
-                checked_tones_chord = TMIDIX.check_and_fix_tones_chord(tones_chord)
-                chord_token = TMIDIX.ALL_CHORDS_SORTED.index(checked_tones_chord)
-            melody_chords.extend([chord_token+384])
-            if input_strip_notes:
-              if len(tones_chord) > 1:
-                chords.extend([chord_token+384])
-            else:
-              chords.extend([chord_token+384])
-            if first_chord:
-                    melody_chords.extend([0])
-                    first_chord = False
-            for e in c:
-                #=======================================================
-                # Timings...
-                time = e[1]-pe[1]
-                dur = e[2]
-                if time != 0 and time % 2 != 0:
-                    time += 1
-                if dur % 2 != 0:
-                    dur += 1
-                delta_time = int(max(0, min(255, time)) / 2)
-                # Durations
-                dur = int(max(0, min(255, dur)) / 2)
-                # Pitches
-                ptc = max(1, min(127, e[4]))
-                #=======================================================
-                # FINAL NOTE SEQ
-                # Writing final note asynchronously
-                if delta_time != 0:
-                    melody_chords.extend([delta_time, dur+128, ptc+256])
-                    if input_strip_notes:
-                      if len(c) > 1:
-                        times.append(delta_time)
-                        durs.append(dur+128)
-                    else:
-                        times.append(delta_time)
-                        durs.append(dur+128)
-                else:
-                    melody_chords.extend([dur+128, ptc+256])
-                pe = e
     #==================================================================
     print('=' * 70)
@@ -368,11 +238,8 @@ if __name__ == "__main__":
         gr.Markdown(
             "![Visitors](https://api.visitorbadge.io/api/visitors?path=asigalov61.Melody2Song-Seq2Seq-Music-Transformer&style=flat)\n\n")
-        input_midi = gr.File(label="Input MIDI", file_types=[".midi", ".mid", ".kar"])
-        input_num_tokens = gr.Slider(4, 128, value=32, step=1, label="Number of composition chords to generate progression for")
-        input_conditioning_type = gr.Radio(["Chords", "Chords-Times", "Chords-Times-Durations"], label="Conditioning type")
-        input_strip_notes = gr.Checkbox(label="Strip notes from the composition")
         run_btn = gr.Button("generate", variant="primary")
         gr.Markdown("## Generation results")
@@ -383,8 +250,7 @@ if __name__ == "__main__":
         output_plot = gr.Plot(label="Output MIDI score plot")
         output_midi = gr.File(label="Output MIDI file", file_types=[".mid"])
-        run_event = run_btn.click(GenerateAccompaniment, [input_midi, input_num_tokens, input_conditioning_type, input_strip_notes],
                                   [output_midi_title, output_midi_summary, output_midi, output_audio, output_plot])
         app.queue().launch()

+# https://huggingface.co/spaces/asigalov61/Melody2Song-Seq2Seq-Music-Transformer
 import time as reqtime
 import datetime
 # =================================================================================================
 @spaces.GPU
+def GenerateSong(input_melody_seed_number):
     print('=' * 70)
     print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
     start_time = reqtime.time()
     print('Loading model...')
+    SEQ_LEN = 2560
+    PAD_IDX = 514
     DEVICE = 'cuda' # 'cuda'
     # instantiate the model
     model = TransformerWrapper(
         num_tokens = PAD_IDX+1,
         max_seq_len = SEQ_LEN,
+        attn_layers = Decoder(dim = 1024, depth = 24, heads = 16, attn_flash = True)
         )
     model = AutoregressiveWrapper(model, ignore_index = PAD_IDX)
     print('Loading model checkpoint...')
     model.load_state_dict(
+        torch.load('Melody2Song_Seq2Seq_Music_Transformer_Trained_Model_28482_steps_0.719_loss_0.7865_acc.pth',
                    map_location=DEVICE))
     print('=' * 70)
     if DEVICE == 'cpu':
         dtype = torch.bfloat16
     else:
+        dtype = torch.bfloat16
     ctx = torch.amp.autocast(device_type=DEVICE, dtype=dtype)
     print('Done!')
     print('=' * 70)
+    print('Input melody seed number:', input_melody_seed_number)
     print('-' * 70)
     #==================================================================
     print('=' * 70)
         gr.Markdown(
             "![Visitors](https://api.visitorbadge.io/api/visitors?path=asigalov61.Melody2Song-Seq2Seq-Music-Transformer&style=flat)\n\n")
+        input_melody_seed_number = gr.Slider(0, 200000, value=0, step=1, label="Select seed melody number")
         run_btn = gr.Button("generate", variant="primary")
         gr.Markdown("## Generation results")
         output_plot = gr.Plot(label="Output MIDI score plot")
         output_midi = gr.File(label="Output MIDI file", file_types=[".mid"])
+        run_event = run_btn.click(GenerateSong, [input_melody_seed_number],
                                   [output_midi_title, output_midi_summary, output_midi, output_audio, output_plot])
         app.queue().launch()