File size: 5,356 Bytes
a03c9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import os
from tasks.utils.event_codec import Event, EventRange
from tasks.utils import event_codec

ec = event_codec.Codec(
            max_shift_steps=1000,  # this means 0,1,...,1000
            steps_per_second=100,
            event_ranges=[
                EventRange('pitch', min_value=0, max_value=127),
                EventRange('velocity', min_value=0, max_value=1),
                EventRange('tie', min_value=0, max_value=0),
                EventRange('program', min_value=0, max_value=127),
                EventRange('drum', min_value=0, max_value=127),
            ],
        )

events = [
    Event(type='shift', value=0),  # actually not needed
    Event(type='shift', value=1),  # 10 ms shift
    Event(type='shift', value=1000),  # 10 s shift
    Event(type='pitch', value=0),  # lowest pitch 8.18 Hz
    Event(type='pitch', value=60),  # C4 or 261.63 Hz
    Event(type='pitch', value=127),  # highest pitch G9 or 12543.85 Hz
    Event(type='velocity', value=0),  # lowest velocity)
    Event(type='velocity', value=1),  # lowest velocity)
    Event(type='tie', value=0),  # tie
    Event(type='program', value=0),  # program
    Event(type='program', value=127),  # program
    Event(type='drum', value=0),  # drum
    Event(type='drum', value=127),  # drum
]

events = events * 100
tokens = [ec.encode_event(e) for e in events]
tokens = np.array(tokens, dtype=np.int16)

import csv
# Save events to a CSV file
with open('events.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    for event in events:
        writer.writerow([event.type, event.value])

# Load events from a CSV file
with open('events.csv', 'r') as file:
    reader = csv.reader(file)
    events2 = [Event(row[0], int(row[1])) for row in reader]


import json
# Save events to a JSON file
with open('events.json', 'w') as file:
    json.dump([event.__dict__ for event in events], file)

# Load events from a JSON file
with open('events.json', 'r') as file:
    events = [Event(**event_dict) for event_dict in json.load(file)]




"""----------------------------"""
# Write the tokens to a npy file
import numpy as np
np.save('tokens.npy', tokens)

def t_npy():
    t = np.load('tokens.npy', allow_pickle=True) # allow pickle doesn't affect speed

os.makedirs('temp', exist_ok=True)
for i in range(2400):
    np.save(f'temp/tokens{i}.npy', tokens)

def t_npy2400():
    for i in range(2400):
        t = np.load(f'temp/tokens{i}.npy')
def t_npy2400_take200():
    for i in range(200):
        t = np.load(f'temp/tokens{i}.npy')

import shutil
shutil.rmtree('temp', ignore_errors=True)

# Write the 2400 tokens to a single npy file
data = dict()
for i in range(2400):
    data[f'arr{i}'] = tokens.copy()
np.save(f'tokens_2400x.npy', data)
def t_npy2400single():
    t = np.load('tokens_2400x.npy', allow_pickle=True).item()

def t_mmap2400single():
    t = np.load('tokens_2400x.npy', mmap_mode='r')

# Write the tokens to a npz file
np.savez('tokens.npz', arr0=tokens)
def t_npz():
    npz_file = np.load('tokens.npz')
    tt = npz_file['arr0']

data = dict()
for i in range(2400):
    data[f'arr{i}'] = tokens
np.savez('tokens.npz', **data )
def t_npz2400():
    npz_file = np.load('tokens.npz')
    for i in range(2400):
        tt = npz_file[f'arr{i}']

def t_npz2400_take200():
    npz_file = np.load('tokens.npz')
    # npz_file.files
    for i in range(200):
        tt = npz_file[f'arr{i}']


# Write the tokens to a txt file
with open('tokens.txt', 'w') as file:
    file.write(' '.join(map(str, tokens)))

def t_txt():
    # Read the tokens from the file
    with open('tokens.txt', 'r') as file:
        t = list(map(int, file.read().split()))
    t = np.array(t)


# Write the tokens to a CSV file
with open('tokens.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(tokens)

def t_csv():
    # Read the tokens from the CSV file
    with open('tokens.csv', 'r') as file:
        reader = csv.reader(file)
        t = list(map(int, next(reader)))
        t = np.array(t)


# Write the tokens to a JSON file
with open('tokens.json', 'w') as file:
    json.dump(tokens, file)

def t_json():
    # Read the tokens from the JSON file
    with open('tokens.json', 'r') as file:
        t = json.load(file)
        t = np.array(t)

with open('tokens_2400x.json', 'w') as file:
    json.dump(data, file)

def t_json2400single():
    # Read the tokens from the JSON file
    with open('tokens_2400x.json', 'r') as file:
        t = json.load(file)      

def t_mmap():
    t = np.load('tokens.npy', mmap_mode='r')

# Write the tokens to bytes file




np.savetxt('tokens.ntxt', tokens)
def t_ntxt():
    t = np.loadtxt('tokens.ntxt').astype(np.int32)

%timeit t_npz() # 139 us
%timeit t_mmap() # 3.12 ms 
%timeit t_npy() # 87.8 us
%timeit t_txt() # 109 152 us
%timeit t_csv() # 145 190 us
%timeit t_json() # 72.8 119 us
%timeit t_ntxt() # 878 us

%timeit t_npy2400() # 212 ms; 2400 files in a folder
%timeit t_npz2400() # 296 ms; uncompreesed 1000 arrays in a single file

%timeit t_npy2400_take200() # 17.4 ms; 25 Mb
%timeit t_npz2400_take200() # 28.8 ms; 3.72 ms for 10 arrays; 25 Mb
%timeit t_npy2400single() # 4 ms; frozen dictionary containing 2400 arrays; 6.4 Mb; int16
%timeit t_mmap2400single() # dictionary is not supported 
%timeit t_json2400single() # 175 ms; 17 Mb
# 2400 files from 100ms hop for 4 minutes