Spaces:

kxic
/

EscherNet

Running on Zero

EscherNet / croco /models /head_downstream.py

kxhit

update

5f093a6 6 months ago

2.65 kB

	# Copyright (C) 2022-present Naver Corporation. All rights reserved.
	# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).

	# --------------------------------------------------------
	# Heads for downstream tasks
	# --------------------------------------------------------

	"""
	A head is a module where the __init__ defines only the head hyperparameters.
	A method setup(croconet) takes a CroCoNet and set all layers according to the head and croconet attributes.
	The forward takes the features as well as a dictionary img_info containing the keys 'width' and 'height'
	"""

	import torch
	import torch.nn as nn
	from .dpt_block import DPTOutputAdapter


	class PixelwiseTaskWithDPT(nn.Module):
	""" DPT module for CroCo.
	by default, hooks_idx will be equal to:
	* for encoder-only: 4 equally spread layers
	* for encoder+decoder: last encoder + 3 equally spread layers of the decoder
	"""

	def __init__(self, *, hooks_idx=None, layer_dims=[96,192,384,768],
	output_width_ratio=1, num_channels=1, postprocess=None, **kwargs):
	super(PixelwiseTaskWithDPT, self).__init__()
	self.return_all_blocks = True # backbone needs to return all layers
	self.postprocess = postprocess
	self.output_width_ratio = output_width_ratio
	self.num_channels = num_channels
	self.hooks_idx = hooks_idx
	self.layer_dims = layer_dims

	def setup(self, croconet):
	dpt_args = {'output_width_ratio': self.output_width_ratio, 'num_channels': self.num_channels}
	if self.hooks_idx is None:
	if hasattr(croconet, 'dec_blocks'): # encoder + decoder
	step = {8: 3, 12: 4, 24: 8}[croconet.dec_depth]
	hooks_idx = [croconet.dec_depth+croconet.enc_depth-1-i*step for i in range(3,-1,-1)]
	else: # encoder only
	step = croconet.enc_depth//4
	hooks_idx = [croconet.enc_depth-1-i*step for i in range(3,-1,-1)]
	self.hooks_idx = hooks_idx
	print(f' PixelwiseTaskWithDPT: automatically setting hook_idxs={self.hooks_idx}')
	dpt_args['hooks'] = self.hooks_idx
	dpt_args['layer_dims'] = self.layer_dims
	self.dpt = DPTOutputAdapter(**dpt_args)
	dim_tokens = [croconet.enc_embed_dim if hook<croconet.enc_depth else croconet.dec_embed_dim for hook in self.hooks_idx]
	dpt_init_args = {'dim_tokens_enc': dim_tokens}
	self.dpt.init(**dpt_init_args)


	def forward(self, x, img_info):
	out = self.dpt(x, image_size=(img_info['height'],img_info['width']))
	if self.postprocess: out = self.postprocess(out)
	return out