import torch as T import os def rank0(): rank = os.environ.get('RANK') if rank is None or rank == '0': return True else: return False def print_colored(message, color='reset', bold=False, **kwargs): color_dict = { 'bold': '\033[1m', 'green': '\033[92m', 'yellow': '\033[93m', 'red': '\033[91m', 'blue': '\033[94m', 'grey': '\033[90m', 'white': '\033[97m', 'reset': '\033[0m' } color_code = color_dict.get(color.lower(), color_dict['reset']) prefix = color_dict['bold'] if bold else '' print(f"{prefix}{color_code}{message}{color_dict['reset']}", **kwargs) def print0_colored(*args, **kwargs): if rank0(): print_colored(*args, **kwargs) def param_count(module): def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) total_params = count_parameters(module) output = [f'Total model parameters: {total_params:,}', '---------------------------'] for name, child in module.named_children(): params = count_parameters(child) output.append(f'{name} parameters: {params:,}') return '\n'.join(output) def model_size_estimation(module): def estimate_size(model): param_size = sum(p.nelement() * p.element_size() for p in model.parameters()) buffer_size = sum(b.nelement() * b.element_size() for b in model.buffers()) return param_size + buffer_size total_size = estimate_size(module) output = [f'Total model size: {total_size / 1024**2:.2f} MB', '---------------------------'] for name, child in module.named_children(): child_size = estimate_size(child) output.append(f'{name} size: {child_size / 1024**2:.2f} MB') return '\n'.join(output) def layer_param_distribution(module): def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) def get_layer_types(model): layer_types = {} for name, module in model.named_modules(): layer_type = module.__class__.__name__ params = sum(p.numel() for p in module.parameters(recurse=False) if p.requires_grad) if params > 0: if layer_type not in layer_types: layer_types[layer_type] = 0 layer_types[layer_type] += params return layer_types total_params = count_parameters(module) layer_types = get_layer_types(module) output = [f'Total trainable parameters: {total_params:,}', '---------------------------'] for layer_type, count in sorted(layer_types.items(), key=lambda x: x[1], reverse=True): percentage = (count / total_params) * 100 output.append(f'{layer_type}: {count:,} ({percentage:.2f}%)') return '\n'.join(output)