File size: 2,356 Bytes
e775f6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a5d300
e775f6d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import torch
import piano_transcription_inference
import numpy as np
import os
import sys
sys.path.append('../../')
from src.music.utils import get_out_path, load_audio
from src.music.config import CHKPT_PATH_TRANSCRIPTION, FPS, MIN_LEN, CROP_LEN
# import librosa
device = 'cuda' if torch.cuda.is_available() else 'cpu'
TRANSCRIPTOR = piano_transcription_inference.PianoTranscription(device=device,
                                                                checkpoint_path=CHKPT_PATH_TRANSCRIPTION)

def audio2midi(audio_path, midi_path=None, crop=CROP_LEN, random_crop=True, verbose=False, level=0):
    if verbose and crop < MIN_LEN + 2:
        print('crop is inferior to the minimal length of a tune')
    assert '.mp3' == audio_path[-4:]
    if midi_path is None:
        midi_path, _, _ = get_out_path(in_path=audio_path, in_word='audio', out_word='midi', out_extension='.mid')

    if verbose: print(' ' * level + f'Transcribing {audio_path}.')
    if os.path.exists(midi_path):
        if verbose: print(' ' * (level + 2) + 'Midi file already exists.')
        return midi_path, ''

    error_msg = 'Error in transcription. '
    try:
        error_msg += 'Maybe in audio loading?'
        (audio, _) = load_audio(audio_path,
                                sr=FPS,
                                mono=True)
        error_msg += ' Nope. Cropping?'
        if isinstance(crop, int) and len(audio) > FPS * crop:
            rc_str = ' (random crop)' if random_crop else ' (start crop)'
            if verbose: print(' ' * (level + 2) + f'Cropping the song to {crop}s before transcription{rc_str}. ')
            size_crop = FPS * crop
            if random_crop:
                index_begining = np.random.randint(len(audio) - size_crop - 1)
            else:
                index_begining = 0
            audio = audio[index_begining: index_begining + size_crop]
        error_msg += ' Nope. Transcription?'
        TRANSCRIPTOR.transcribe(audio, midi_path)
        error_msg += ' Nope.'
        extra = f' Saved to {midi_path}' if midi_path else ''
        if verbose: print(' ' * (level + 2) + f'Success! {extra}')
        return midi_path, ''
    except:
        if verbose: print(' ' * (level + 2) + 'Transcription failed.')
        if os.path.exists(midi_path):
            os.remove(midi_path)
        return None, error_msg + ' Yes.'