""" This is a high-level pseudo code for grounding net. This class needs to tokenize grounding input into gronding tokens which will be used in GatedAttenion layers. class PositionNet(nn.Module): def __init__(self, **kwargs): super().__init__() kwargs should be defined by model.grounding_tokenizer in config yaml file. def forward(self, **kwargs): kwargs should be the output of grounding_tokenizer_input network return grounding_tokens # with shape: Batch * Num_Of_Token* Token_Channel_Dimension """ # = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = # """ This is a high-level pseudo code for downsampler. This class needs to process input and output a spatial feature such that it will be fed into the first conv layer. class GroundingDownsampler(nn.Module): def __init__(self, **kwargs): super().__init__() kwargs should be defined by model.grounding_downsampler in config yaml file. you MUST define self.out_dim such that Unet knows add how many extra layers def forward(self, **kwargs): kwargs should be the output of grounding_downsampler_input network return spatial_feature # with shape: Batch * self.out_dim * H *W (64*64 for SD) """