LoCo / dataset /concat_dataset.py
Pusheen's picture
Upload 139 files
281df87 verified
from .catalog import DatasetCatalog
from ldm.util import instantiate_from_config
import torch
class ConCatDataset():
def __init__(self, dataset_name_list, ROOT, which_embedder, train=True, repeats=None):
self.datasets = []
cul_previous_dataset_length = 0
offset_map = []
which_dataset = []
if repeats is None:
repeats = [1] * len(dataset_name_list)
else:
assert len(repeats) == len(dataset_name_list)
Catalog = DatasetCatalog(ROOT, which_embedder)
for dataset_idx, (dataset_name, yaml_params) in enumerate(dataset_name_list.items()):
repeat = repeats[dataset_idx]
dataset_dict = getattr(Catalog, dataset_name)
target = dataset_dict['target']
params = dataset_dict['train_params'] if train else dataset_dict['val_params']
if yaml_params is not None:
params.update(yaml_params)
dataset = instantiate_from_config( dict(target=target, params=params) )
self.datasets.append(dataset)
for _ in range(repeat):
offset_map.append( torch.ones(len(dataset))*cul_previous_dataset_length )
which_dataset.append( torch.ones(len(dataset))*dataset_idx )
cul_previous_dataset_length += len(dataset)
offset_map = torch.cat(offset_map, dim=0).long()
self.total_length = cul_previous_dataset_length
self.mapping = torch.arange(self.total_length) - offset_map
self.which_dataset = torch.cat(which_dataset, dim=0).long()
def total_images(self):
count = 0
for dataset in self.datasets:
print(dataset.total_images())
count += dataset.total_images()
return count
def __getitem__(self, idx):
dataset = self.datasets[ self.which_dataset[idx] ]
return dataset[ self.mapping[idx] ]
def __len__(self):
return self.total_length