Ayush Chaurasia
commited on
Commit
•
3067429
1
Parent(s):
ec8979f
Add support for list-of-directory data format for wandb (#2719)
Browse files
utils/wandb_logging/wandb_utils.py
CHANGED
@@ -57,14 +57,14 @@ def process_wandb_config_ddp_mode(opt):
|
|
57 |
with open(opt.data) as f:
|
58 |
data_dict = yaml.load(f, Loader=yaml.SafeLoader) # data dict
|
59 |
train_dir, val_dir = None, None
|
60 |
-
if data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
|
61 |
api = wandb.Api()
|
62 |
train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
|
63 |
train_dir = train_artifact.download()
|
64 |
train_path = Path(train_dir) / 'data/images/'
|
65 |
data_dict['train'] = str(train_path)
|
66 |
|
67 |
-
if data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
|
68 |
api = wandb.Api()
|
69 |
val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
|
70 |
val_dir = val_artifact.download()
|
@@ -158,7 +158,7 @@ class WandbLogger():
|
|
158 |
return data_dict
|
159 |
|
160 |
def download_dataset_artifact(self, path, alias):
|
161 |
-
if path and path.startswith(WANDB_ARTIFACT_PREFIX):
|
162 |
dataset_artifact = wandb.use_artifact(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
|
163 |
assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
|
164 |
datadir = dataset_artifact.download()
|
@@ -229,7 +229,9 @@ class WandbLogger():
|
|
229 |
def create_dataset_table(self, dataset, class_to_id, name='dataset'):
|
230 |
# TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
|
231 |
artifact = wandb.Artifact(name=name, type="dataset")
|
232 |
-
|
|
|
|
|
233 |
if Path(img_file).is_dir():
|
234 |
artifact.add_dir(img_file, name='data/images')
|
235 |
labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
|
|
|
57 |
with open(opt.data) as f:
|
58 |
data_dict = yaml.load(f, Loader=yaml.SafeLoader) # data dict
|
59 |
train_dir, val_dir = None, None
|
60 |
+
if isinstance(data_dict['train'], str) and data_dict['train'].startswith(WANDB_ARTIFACT_PREFIX):
|
61 |
api = wandb.Api()
|
62 |
train_artifact = api.artifact(remove_prefix(data_dict['train']) + ':' + opt.artifact_alias)
|
63 |
train_dir = train_artifact.download()
|
64 |
train_path = Path(train_dir) / 'data/images/'
|
65 |
data_dict['train'] = str(train_path)
|
66 |
|
67 |
+
if isinstance(data_dict['val'], str) and data_dict['val'].startswith(WANDB_ARTIFACT_PREFIX):
|
68 |
api = wandb.Api()
|
69 |
val_artifact = api.artifact(remove_prefix(data_dict['val']) + ':' + opt.artifact_alias)
|
70 |
val_dir = val_artifact.download()
|
|
|
158 |
return data_dict
|
159 |
|
160 |
def download_dataset_artifact(self, path, alias):
|
161 |
+
if isinstance(path, str) and path.startswith(WANDB_ARTIFACT_PREFIX):
|
162 |
dataset_artifact = wandb.use_artifact(remove_prefix(path, WANDB_ARTIFACT_PREFIX) + ":" + alias)
|
163 |
assert dataset_artifact is not None, "'Error: W&B dataset artifact doesn\'t exist'"
|
164 |
datadir = dataset_artifact.download()
|
|
|
229 |
def create_dataset_table(self, dataset, class_to_id, name='dataset'):
|
230 |
# TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
|
231 |
artifact = wandb.Artifact(name=name, type="dataset")
|
232 |
+
img_files = tqdm([dataset.path]) if isinstance(dataset.path, str) and Path(dataset.path).is_dir() else None
|
233 |
+
img_files = tqdm(dataset.img_files) if not img_files else img_files
|
234 |
+
for img_file in img_files:
|
235 |
if Path(img_file).is_dir():
|
236 |
artifact.add_dir(img_file, name='data/images')
|
237 |
labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
|