glenn-jocher commited on
Commit
54652fe
1 Parent(s): 801b469

Objects365 update

Browse files
Files changed (2) hide show
  1. data/scripts/get_objects365.py +21 -9
  2. utils/general.py +3 -3
data/scripts/get_objects365.py CHANGED
@@ -7,22 +7,34 @@
7
  # /images
8
  # /labels
9
 
 
10
  from pycocotools.coco import COCO
11
 
12
- coco = COCO("zhiyuan_objv2_train.json")
13
- cats = coco.loadCats(coco.getCatIds())
14
- nms = [cat["name"] for cat in cats]
15
- print("COCO categories: \n{}\n".format(" ".join(nms)))
16
- for categoryId, cat in enumerate(nms):
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  catIds = coco.getCatIds(catNms=[cat])
18
  imgIds = coco.getImgIds(catIds=catIds)
19
- print(cat)
20
- # Create a subfolder in this directory called "labels". This is where the annotations will be saved in YOLO format
21
  for im in coco.loadImgs(imgIds):
22
  width, height = im["width"], im["height"]
23
- path = im["file_name"].split("/")[-1] # image filename
24
  try:
25
- with open("labels/train/" + path.replace(".jpg", ".txt"), "a+") as file:
26
  annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
27
  for a in coco.loadAnns(annIds):
28
  x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
 
7
  # /images
8
  # /labels
9
 
10
+
11
  from pycocotools.coco import COCO
12
 
13
+ from utils.general import download, Path
14
+
15
+ # Make Directories
16
+ dir = Path('../datasets/objects365') # dataset directory
17
+ for p in 'images', 'labels':
18
+ (dir / p).mkdir(parents=True, exist_ok=True)
19
+ for q in 'train', 'val':
20
+ (dir / p / q).mkdir(parents=True, exist_ok=True)
21
+
22
+ # Download
23
+ url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
24
+ download(url + 'zhiyuan_objv2_train.tar.gz', dir=dir, threads=8) # annotations json
25
+ download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train', threads=8)
26
+
27
+ # Labels
28
+ coco = COCO(dir / 'zhiyuan_objv2_train.json')
29
+ names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
30
+ for categoryId, cat in enumerate(names):
31
  catIds = coco.getCatIds(catNms=[cat])
32
  imgIds = coco.getImgIds(catIds=catIds)
 
 
33
  for im in coco.loadImgs(imgIds):
34
  width, height = im["width"], im["height"]
35
+ path = Path(im["file_name"]) # image filename
36
  try:
37
+ with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file:
38
  annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None)
39
  for a in coco.loadAnns(annIds):
40
  x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner)
utils/general.py CHANGED
@@ -183,7 +183,7 @@ def check_dataset(dict):
183
  raise Exception('Dataset not found.')
184
 
185
 
186
- def download(url, dir='.', multi_thread=False):
187
  # Multi-threaded file download and unzip function
188
  def download_one(url, dir):
189
  # Download 1 file
@@ -200,8 +200,8 @@ def download(url, dir='.', multi_thread=False):
200
 
201
  dir = Path(dir)
202
  dir.mkdir(parents=True, exist_ok=True) # make directory
203
- if multi_thread:
204
- ThreadPool(8).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # 8 threads
205
  else:
206
  for u in tuple(url) if isinstance(url, str) else url:
207
  download_one(u, dir)
 
183
  raise Exception('Dataset not found.')
184
 
185
 
186
+ def download(url, dir='.', threads=1):
187
  # Multi-threaded file download and unzip function
188
  def download_one(url, dir):
189
  # Download 1 file
 
200
 
201
  dir = Path(dir)
202
  dir.mkdir(parents=True, exist_ok=True) # make directory
203
+ if threads > 1:
204
+ ThreadPool(threads).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
205
  else:
206
  for u in tuple(url) if isinstance(url, str) else url:
207
  download_one(u, dir)