import glob import itertools import os import subprocess from argparse import ArgumentParser from collections import Counter from joblib import Parallel, delayed parser = ArgumentParser() parser.add_argument("in_dir") parser.add_argument("out_dir") args = parser.parse_args() os.makedirs(args.out_dir, exist_ok=True) files = itertools.chain( glob.iglob(f"{args.in_dir}/*/*.jpg"), glob.iglob(f"{args.in_dir}/*/*.JGPG"), glob.iglob(f"{args.in_dir}/*/*.jpeg"), glob.iglob(f"{args.in_dir}/*/*.JPEG"), glob.iglob(f"{args.in_dir}/*/*.png"), glob.iglob(f"{args.in_dir}/*/*.PNG"), glob.iglob(f"{args.in_dir}/*/*.svg"), glob.iglob(f"{args.in_dir}/*/*.SVG"), ) def process_file(path): basename = os.path.basename(path) name = os.path.splitext(basename)[0] try: r = subprocess.run( f'convert {path} -resize "224^>" -colorspace RGB -density 1200 {args.out_dir}/{name}.jpg', shell=True, timeout=10, ) rcode = r.returncode except subprocess.TimeoutExpired: print("conversion timeout expired") rcode = -1 if rcode == 0: os.remove(path) return rcode codes = Parallel(n_jobs=32, prefer="threads", verbose=1)( delayed(process_file)(f) for f in files ) print(Counter(codes))