Spaces:
Running
Running
import os | |
import random | |
import shutil | |
def extract_images(source_folder, destination_folder): | |
count = 0 | |
for root, _, files in os.walk(source_folder): | |
for file in files: | |
if file.endswith(('jpg', '.png')): | |
src_path = os.path.join(root, file) | |
dst_path = os.path.join(destination_folder, f"{count:05d}" + os.path.splitext(file)[1]) | |
shutil.copy(src_path, dst_path) | |
count += 1 | |
def split_data(data_folder): | |
train_folder = f"{data_folder}/train" | |
validation_folder = f"{data_folder}/validation" | |
test_folder = f"{data_folder}/test" | |
for folder in [train_folder, validation_folder, test_folder]: | |
if not os.path.exists(folder): | |
os.makedirs(folder) | |
image_files = [f for f in os.listdir(data_folder) if os.path.isfile(os.path.join(data_folder, f))] | |
random.shuffle(image_files) | |
total_images = len(image_files) | |
train_count = int(0.7 * total_images) | |
validation_count = int(0.2 * total_images) | |
for i in range(train_count): | |
shutil.move(os.path.join(data_folder, image_files[i]), train_folder) | |
for i in range(train_count, train_count + validation_count): | |
shutil.move(os.path.join(data_folder, image_files[i]), validation_folder) | |
for i in range(train_count + validation_count, total_images): | |
shutil.move(os.path.join(data_folder, image_files[i]), test_folder) | |
if __name__ == "__main__": | |
source_folder = "manga/" | |
destination_folder = "data/" | |
extract_images(source_folder, destination_folder) | |
data_folder = "data/" | |
split_data(data_folder) |