File size: 662 Bytes
c6070db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
import os
import numpy as np
filename = 'E:/uma_voice/output.txt'
split ='|'
with open(filename, encoding='utf-8') as f:
filepaths_and_text = [line.strip().split(split) for line in f]
train_filename = filename.split('.')[0] + '_train' + '.txt'
val_filename = filename.split('.')[0] + '_val' + '.txt'
train_split_ratio = 0.99
train_f = open(train_filename, 'w', encoding='utf-8')
val_f = open(val_filename, 'w', encoding='utf-8')
for i in range(len(filepaths_and_text)):
if np.random.rand() < train_split_ratio:
train_f.writelines('|'.join(filepaths_and_text[i]) + '\n')
else:
val_f.writelines('|'.join(filepaths_and_text[i]) + '\n') |