|
import os |
|
import shutil |
|
import json |
|
import random |
|
|
|
|
|
source_folder = "/mnt/petrelfs/zhuchenglin/diffusion/coco/images/train2017" |
|
|
|
target_folder = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/images" |
|
|
|
target_anno_folder = "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain" |
|
|
|
annotations_coco_path = ( |
|
"/mnt/petrelfs/zhuchenglin/diffusion/coco/annotations/captions_train2017.json" |
|
) |
|
with open(annotations_coco_path, "r") as f: |
|
annotations = json.load(f) |
|
|
|
new_annotations = [] |
|
for index, annotation in enumerate(annotations["annotations"][:200000]): |
|
print(index) |
|
|
|
folder_index = 680 + (index // 10000) |
|
target_subfolder = f"{folder_index:05d}" |
|
|
|
|
|
target_image_name = f"{folder_index:05d}{index % 10000:04d}.jpg" |
|
target_image_path = os.path.join(target_folder, target_subfolder, target_image_name) |
|
if not os.path.exists(os.path.join(target_folder, target_subfolder)): |
|
os.makedirs(os.path.join(target_folder, target_subfolder)) |
|
|
|
|
|
source_image_path = os.path.join( |
|
source_folder, f"{annotation['image_id']:012d}.jpg" |
|
) |
|
if os.path.exists(source_image_path): |
|
shutil.copy(source_image_path, target_image_path) |
|
|