Skip to content

Instantly share code, notes, and snippets.

@diaoenmao
Last active July 14, 2020 20:35
Show Gist options
  • Select an option

  • Save diaoenmao/2e3f8abed4b0e690c25944d292ce375c to your computer and use it in GitHub Desktop.

Select an option

Save diaoenmao/2e3f8abed4b0e690c25944d292ce375c to your computer and use it in GitHub Desktop.
Image resizer for Deep Learining image dataset
from PIL import Image
from argparse import ArgumentParser
import os
from multiprocessing import Pool
alg_dict = {
'lanczos': Image.LANCZOS,
'nearest': Image.NEAREST,
'bilinear': Image.BILINEAR,
'bicubic': Image.BICUBIC,
'hamming': Image.HAMMING,
'box': Image.BOX
}
def parse_arguments():
parser = ArgumentParser()
parser.add_argument('-i', '--in_dir', help="Input directory with source images", required=True)
parser.add_argument('-o', '--out_dir', help="Output directory for resized images", required=True)
parser.add_argument('-s', '--size', help="Size of an output image (e.g. 32 results in (32x32) image)",
default=128, type=int)
parser.add_argument('-a', '--algorithm', help="Algorithm used for resampling: lanczos, nearest,"
" bilinear, bicubic, box, hamming",
default='bilinear')
parser.add_argument('-r', '--recurrent', help="Process all subfolders in this folder (1 lvl deep)",
action='store_true', default=1)
parser.add_argument('-e', '--every_nth', help="Use if you don't want to take all classes, "
"if -e 10 then takes every 10th class",
default=1, type=int)
parser.add_argument('-j', '--processes', help="Number of sub-processes that run different folders "
"in the same time ",
default=8, type=int)
args = parser.parse_args()
return args.in_dir, args.out_dir, args.algorithm, args.size, args.recurrent, args.every_nth, args.processes
def str2alg(str):
str = str.lower()
return alg_dict.get(str, None)
# python resizer.py -i base/ILSVRC2012_img_train -o 32/ILSVRC2012_img_train -s 32
# Takes in_dir, out_dir and alg as strings
# resize images from in_dir using algorithm deduced from
# alg string and puts them to out_dir folder
def resize_img_folder(in_dir, out_dir, size, alg):
print('Folder %s' % in_dir)
alg_val = str2alg(alg)
if alg_val is None:
print("Sorry but this algorithm (%s) is not available, use help for more info." % alg)
return
if not os.path.exists(out_dir):
os.makedirs(out_dir)
for filename in os.listdir(in_dir):
# Exception raised when file is not an image
im = Image.open(os.path.join(in_dir, filename))
# Convert grayscale images into 3 channels
if im.mode != "RGB":
im = im.convert(mode="RGB")
im_resized = im.resize((size, size))
# Get rid of extension (.jpg or other)
filename = os.path.splitext(filename)[0]
im_resized.save(os.path.join(out_dir, filename + '.png'))
return
if __name__ == '__main__':
in_dir, out_dir, alg, size, recurrent, every_nth, processes = parse_arguments()
print('Starting ...')
pool = Pool(processes=processes)
repeat = False
print('Using algorithm %s ...' % alg)
if recurrent:
print('Recurrent for all folders in in_dir:\n %s' % in_dir)
folders = [dir for dir in sorted(os.listdir(in_dir)) if os.path.isdir(os.path.join(in_dir, dir))]
for i, folder in enumerate(folders):
if i % every_nth is 0 or repeat is True:
r = pool.apply_async(
func=resize_img_folder,
args=[os.path.join(in_dir, folder), os.path.join(out_dir, folder), size, alg])
else:
print('For folder %s' % in_dir)
resize_img_folder(in_dir=in_dir, out_dir=out_dir, alg=alg)
pool.close()
pool.join()
print("Finished.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment