hertz-dev / utils /interp.py
calculating
committing...
824afbf
import torch as T
import os
def rank0():
rank = os.environ.get('RANK')
if rank is None or rank == '0':
return True
else:
return False
def print_colored(message, color='reset', bold=False, **kwargs):
color_dict = {
'bold': '\033[1m',
'green': '\033[92m',
'yellow': '\033[93m',
'red': '\033[91m',
'blue': '\033[94m',
'grey': '\033[90m',
'white': '\033[97m',
'reset': '\033[0m'
}
color_code = color_dict.get(color.lower(), color_dict['reset'])
prefix = color_dict['bold'] if bold else ''
print(f"{prefix}{color_code}{message}{color_dict['reset']}", **kwargs)
def print0_colored(*args, **kwargs):
if rank0():
print_colored(*args, **kwargs)
def param_count(module):
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = count_parameters(module)
output = [f'Total model parameters: {total_params:,}', '---------------------------']
for name, child in module.named_children():
params = count_parameters(child)
output.append(f'{name} parameters: {params:,}')
return '\n'.join(output)
def model_size_estimation(module):
def estimate_size(model):
param_size = sum(p.nelement() * p.element_size() for p in model.parameters())
buffer_size = sum(b.nelement() * b.element_size() for b in model.buffers())
return param_size + buffer_size
total_size = estimate_size(module)
output = [f'Total model size: {total_size / 1024**2:.2f} MB', '---------------------------']
for name, child in module.named_children():
child_size = estimate_size(child)
output.append(f'{name} size: {child_size / 1024**2:.2f} MB')
return '\n'.join(output)
def layer_param_distribution(module):
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
def get_layer_types(model):
layer_types = {}
for name, module in model.named_modules():
layer_type = module.__class__.__name__
params = sum(p.numel() for p in module.parameters(recurse=False) if p.requires_grad)
if params > 0:
if layer_type not in layer_types:
layer_types[layer_type] = 0
layer_types[layer_type] += params
return layer_types
total_params = count_parameters(module)
layer_types = get_layer_types(module)
output = [f'Total trainable parameters: {total_params:,}', '---------------------------']
for layer_type, count in sorted(layer_types.items(), key=lambda x: x[1], reverse=True):
percentage = (count / total_params) * 100
output.append(f'{layer_type}: {count:,} ({percentage:.2f}%)')
return '\n'.join(output)