glenn-jocher
commited on
Commit
•
54652fe
1
Parent(s):
801b469
Objects365 update
Browse files- data/scripts/get_objects365.py +21 -9
- utils/general.py +3 -3
data/scripts/get_objects365.py
CHANGED
@@ -7,22 +7,34 @@
|
|
7 |
# /images
|
8 |
# /labels
|
9 |
|
|
|
10 |
from pycocotools.coco import COCO
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
catIds = coco.getCatIds(catNms=[cat])
|
18 |
imgIds = coco.getImgIds(catIds=catIds)
|
19 |
-
print(cat)
|
20 |
-
# Create a subfolder in this directory called "labels". This is where the annotations will be saved in YOLO format
|
21 |
for im in coco.loadImgs(imgIds):
|
22 |
width, height = im["width"], im["height"]
|
23 |
-
path = im["file_name"]
|
24 |
try:
|
25 |
-
with open(
|
26 |
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
|
27 |
for a in coco.loadAnns(annIds):
|
28 |
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
|
|
|
7 |
# /images
|
8 |
# /labels
|
9 |
|
10 |
+
|
11 |
from pycocotools.coco import COCO
|
12 |
|
13 |
+
from utils.general import download, Path
|
14 |
+
|
15 |
+
# Make Directories
|
16 |
+
dir = Path('../datasets/objects365') # dataset directory
|
17 |
+
for p in 'images', 'labels':
|
18 |
+
(dir / p).mkdir(parents=True, exist_ok=True)
|
19 |
+
for q in 'train', 'val':
|
20 |
+
(dir / p / q).mkdir(parents=True, exist_ok=True)
|
21 |
+
|
22 |
+
# Download
|
23 |
+
url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
|
24 |
+
download(url + 'zhiyuan_objv2_train.tar.gz', dir=dir, threads=8) # annotations json
|
25 |
+
download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train', threads=8)
|
26 |
+
|
27 |
+
# Labels
|
28 |
+
coco = COCO(dir / 'zhiyuan_objv2_train.json')
|
29 |
+
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
|
30 |
+
for categoryId, cat in enumerate(names):
|
31 |
catIds = coco.getCatIds(catNms=[cat])
|
32 |
imgIds = coco.getImgIds(catIds=catIds)
|
|
|
|
|
33 |
for im in coco.loadImgs(imgIds):
|
34 |
width, height = im["width"], im["height"]
|
35 |
+
path = Path(im["file_name"]) # image filename
|
36 |
try:
|
37 |
+
with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file:
|
38 |
annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
|
39 |
for a in coco.loadAnns(annIds):
|
40 |
x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
|
utils/general.py
CHANGED
@@ -183,7 +183,7 @@ def check_dataset(dict):
|
|
183 |
raise Exception('Dataset not found.')
|
184 |
|
185 |
|
186 |
-
def download(url, dir='.',
|
187 |
# Multi-threaded file download and unzip function
|
188 |
def download_one(url, dir):
|
189 |
# Download 1 file
|
@@ -200,8 +200,8 @@ def download(url, dir='.', multi_thread=False):
|
|
200 |
|
201 |
dir = Path(dir)
|
202 |
dir.mkdir(parents=True, exist_ok=True) # make directory
|
203 |
-
if
|
204 |
-
ThreadPool(
|
205 |
else:
|
206 |
for u in tuple(url) if isinstance(url, str) else url:
|
207 |
download_one(u, dir)
|
|
|
183 |
raise Exception('Dataset not found.')
|
184 |
|
185 |
|
186 |
+
def download(url, dir='.', threads=1):
|
187 |
# Multi-threaded file download and unzip function
|
188 |
def download_one(url, dir):
|
189 |
# Download 1 file
|
|
|
200 |
|
201 |
dir = Path(dir)
|
202 |
dir.mkdir(parents=True, exist_ok=True) # make directory
|
203 |
+
if threads > 1:
|
204 |
+
ThreadPool(threads).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
|
205 |
else:
|
206 |
for u in tuple(url) if isinstance(url, str) else url:
|
207 |
download_one(u, dir)
|