File size: 7,880 Bytes
8abcf2d ef8c30b 8abcf2d ef8c30b 8abcf2d 5d70faf ef8c30b 5d70faf ef8c30b 5d70faf ef8c30b 5d70faf dcb01bb 5d70faf ef8c30b 4483569 dcb01bb 5d70faf dcb01bb 4483569 8abcf2d 4483569 ee7c71e dcb01bb 8abcf2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import gradio as gr
import math
from transformers import AutoConfig # Required for Hugging Face integration
# ---- Helper Functions ---- #
def convert_params(params):
if params == 0:
return "0"
size_name = ("", "K", "M", "B", "T", "P", "E", "Z", "Y")
i = int(math.floor(math.log(params, 1000)))
p = math.pow(1000, i)
s = round(params / p, 2)
return "%s %s" % (s, size_name[i])
# Get Hugging Face model configuration and update the parameters
def get_hf_model_args(hf_model_name_or_path, num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length):
if hf_model_name_or_path:
try:
config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
except Exception as e:
return None, f"Error fetching Hugging Face model: {str(e)}"
# Update parameters with the Hugging Face model config values
num_layers = config.get("num_hidden_layers", num_layers)
hidden_size = config.get("hidden_size", hidden_size)
num_attention_heads = config.get("num_attention_heads", num_attention_heads)
vocab_size = config.get("vocab_size", vocab_size)
sequence_length = config.get("max_position_embeddings", sequence_length)
return {
"num_layers": num_layers,
"hidden_size": hidden_size,
"num_attention_heads": num_attention_heads,
"vocab_size": vocab_size,
"sequence_length": sequence_length,
}, None
# ---- Memory Calculation ---- #
def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
model_params, hf_error = get_hf_model_args(hf_model_name_or_path, num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length)
if hf_error:
return hf_error
num_layers = model_params["num_layers"]
hidden_size = model_params["hidden_size"]
num_attention_heads = model_params["num_attention_heads"]
vocab_size = model_params["vocab_size"]
sequence_length = model_params["sequence_length"]
dp_degree = num_gpus / (tensor_parallel_size * pipeline_parallel_size)
embed_params = 2 * vocab_size * hidden_size
positional_params = hidden_size * sequence_length
ln_params = 8 * hidden_size * num_layers + (2 * hidden_size)
attention_params = int(2 * (1 + ffn_expansion_factor) * num_layers * hidden_size * hidden_size)
mlp_params = ffn_expansion_factor * num_layers * hidden_size * hidden_size
total_params = embed_params + positional_params + ln_params + attention_params + mlp_params
bytes_per_param = 2 if is_mixed_precision else 4
model_mem = total_params * bytes_per_param
per_gpu_mem_gib = (model_mem / (tensor_parallel_size * pipeline_parallel_size)) / 1024**3 + misc_mem_gib
return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
# ---- Parameter Calculation ---- #
def calc_params(vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio):
if tied_embeddings:
embedding_params = hidden_size * vocab_size
else:
embedding_params = 2 * hidden_size * vocab_size
position_embedding_params = hidden_size * sequence_length
attention_params = int(2 * (1 + kv_size_ratio) * num_layers * hidden_size * hidden_size)
layernorm_params = 13 * num_layers * hidden_size
if moe:
num_expert_layers = num_layers / expert_interval
ffn_expert_params = num_mlp_linears * ffn_expansion_factor * num_expert_layers * num_experts * hidden_size * hidden_size
ffn_dense_params = num_mlp_linears * ffn_expansion_factor * (num_layers - num_expert_layers) * hidden_size * hidden_size
ffn_params = ffn_expert_params + ffn_dense_params
gating_params = num_expert_layers * hidden_size * num_experts
else:
ffn_params = num_mlp_linears * ffn_expansion_factor * num_layers * hidden_size * hidden_size
total_params = embedding_params + attention_params + ffn_params + position_embedding_params + layernorm_params
if moe:
total_params += gating_params
return f"""
Embedding parameters: {convert_params(embedding_params)}
Attention parameters: {convert_params(attention_params)}
FFN parameters: {convert_params(ffn_params)}
{'Gating parameters: ' + convert_params(gating_params) if moe else ''}
Total Params in the Model: {convert_params(total_params)}
"""
# ---- Gradio Interface ---- #
with gr.Blocks() as demo:
with gr.Tabs():
# Memory Calculation Tab
with gr.TabItem("Memory Calculation"):
hf_model_name_or_path = gr.Textbox(label="HuggingFace Model Name or Path (optional)", value="")
num_gpus = gr.Number(label="Number of GPUs", value=1)
tensor_parallel_size = gr.Number(label="Tensor Parallel Size", value=1)
pipeline_parallel_size = gr.Number(label="Pipeline Parallel Size", value=1)
batch_size_per_gpu = gr.Number(label="Batch Size per GPU", value=8)
sequence_length = gr.Number(label="Sequence Length", value=2048)
vocab_size = gr.Number(label="Vocab Size", value=51200)
hidden_size = gr.Number(label="Hidden Size", value=6144)
num_attention_heads = gr.Number(label="Number of Attention Heads", value=64)
num_layers = gr.Number(label="Number of Layers", value=44)
ffn_expansion_factor = gr.Number(label="FFN Expansion Factor", value=4)
is_mixed_precision = gr.Checkbox(label="Mixed Precision", value=True)
misc_mem_gib = gr.Number(label="Misc Memory Overhead (GiB)", value=5)
memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
calc_memory_button = gr.Button("Calculate Memory")
calc_memory_button.click(calc_mem,
inputs=[hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib],
outputs=memory_result)
# Parameter Calculation Tab
with gr.TabItem("Parameter Calculation"):
vocab_size = gr.Number(label="Vocab Size", value=51200)
tied_embeddings = gr.Checkbox(label="Tied Embeddings", value=False)
hidden_size = gr.Number(label="Hidden Size", value=6144)
sequence_length = gr.Number(label="Sequence Length", value=2048)
num_layers = gr.Number(label="Number of Layers", value=44)
ffn_expansion_factor = gr.Number(label="FFN Expansion Factor", value=4)
num_mlp_linears = gr.Number(label="Number of Linear Layers per MLP Block", value=2)
kv_size_ratio = gr.Number(label="KV Size Ratio", value=1.0)
with gr.Accordion("MoE Parameters", open=False):
moe = gr.Checkbox(label="MoE", value=False)
num_experts = gr.Number(label="Number of Experts", value=8)
expert_interval = gr.Number(label="Expert Interval", value=1)
topk = gr.Number(label="Top k Routing", value=1)
param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
calc_param_button = gr.Button("Calculate Parameters")
calc_param_button.click(calc_params,
inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
outputs=param_result)
demo.launch()
|