File size: 7,039 Bytes
8abcf2d ef8c30b 834b1c6 8abcf2d ef8c30b 8abcf2d 5d70faf d65669a 5d70faf dcb01bb d65669a dcb01bb d65669a dcb01bb d65669a ef8c30b 4483569 dcb01bb 5d70faf dcb01bb d65669a dcb01bb 4483569 8abcf2d 4483569 ee7c71e dcb01bb 8abcf2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
from transformers import AutoConfig # Required for Hugging Face integration
from calc_params import calc_params # Import calc_params from the new file
# ---- Helper Functions ---- #
def convert_params(params):
if params == 0:
return "0"
size_name = ("", "K", "M", "B", "T", "P", "E", "Z", "Y")
i = int(math.floor(math.log(params, 1000)))
p = math.pow(1000, i)
s = round(params / p, 2)
return "%s %s" % (s, size_name[i])
# Get Hugging Face model configuration and update the parameters
def get_hf_model_args(hf_model_name_or_path):
try:
config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
except Exception as e:
return None, f"Error fetching Hugging Face model: {str(e)}"
# Extract relevant values from the config
num_layers = config.get("num_hidden_layers", None)
hidden_size = config.get("hidden_size", None)
num_attention_heads = config.get("num_attention_heads", None)
vocab_size = config.get("vocab_size", None)
sequence_length = config.get("max_position_embeddings", None)
return {
"num_layers": num_layers,
"hidden_size": hidden_size,
"num_attention_heads": num_attention_heads,
"vocab_size": vocab_size,
"sequence_length": sequence_length,
}, None
# ---- Memory Calculation ---- #
def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
model_params, hf_error = get_hf_model_args(hf_model_name_or_path) if hf_model_name_or_path else (None, None)
if hf_error:
return hf_error
if model_params:
num_layers = model_params["num_layers"] or num_layers
hidden_size = model_params["hidden_size"] or hidden_size
num_attention_heads = model_params["num_attention_heads"] or num_attention_heads
vocab_size = model_params["vocab_size"] or vocab_size
sequence_length = model_params["sequence_length"] or sequence_length
dp_degree = num_gpus / (tensor_parallel_size * pipeline_parallel_size)
embed_params = 2 * vocab_size * hidden_size
positional_params = hidden_size * sequence_length
ln_params = 8 * hidden_size * num_layers + (2 * hidden_size)
attention_params = int(2 * (1 + ffn_expansion_factor) * num_layers * hidden_size * hidden_size)
mlp_params = ffn_expansion_factor * num_layers * hidden_size * hidden_size
total_params = embed_params + positional_params + ln_params + attention_params + mlp_params
bytes_per_param = 2 if is_mixed_precision else 4
model_mem = total_params * bytes_per_param
per_gpu_mem_gib = (model_mem / (tensor_parallel_size * pipeline_parallel_size)) / 1024**3 + misc_mem_gib
return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
# ---- Update Gradio inputs with Hugging Face model config ---- #
def update_from_hf_model(hf_model_name_or_path):
model_params, hf_error = get_hf_model_args(hf_model_name_or_path)
if hf_error:
return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), hf_error
return (gr.update(value=model_params["num_layers"]),
gr.update(value=model_params["hidden_size"]),
gr.update(value=model_params["num_attention_heads"]),
gr.update(value=model_params["vocab_size"]),
gr.update(value=model_params["sequence_length"]),
"")
# ---- Gradio Interface ---- #
with gr.Blocks() as demo:
with gr.Tabs():
# Memory Calculation Tab
with gr.TabItem("Memory Calculation"):
hf_model_name_or_path = gr.Textbox(label="HuggingFace Model Name or Path (optional)", value="")
num_gpus = gr.Number(label="Number of GPUs", value=1)
tensor_parallel_size = gr.Number(label="Tensor Parallel Size", value=1)
pipeline_parallel_size = gr.Number(label="Pipeline Parallel Size", value=1)
batch_size_per_gpu = gr.Number(label="Batch Size per GPU", value=8)
sequence_length = gr.Number(label="Sequence Length", value=2048)
vocab_size = gr.Number(label="Vocab Size", value=51200)
hidden_size = gr.Number(label="Hidden Size", value=6144)
num_attention_heads = gr.Number(label="Number of Attention Heads", value=64)
num_layers = gr.Number(label="Number of Layers", value=44)
ffn_expansion_factor = gr.Number(label="FFN Expansion Factor", value=4)
is_mixed_precision = gr.Checkbox(label="Mixed Precision", value=True)
misc_mem_gib = gr.Number(label="Misc Memory Overhead (GiB)", value=5)
memory_result = gr.Textbox(label="Memory Calculation Result", interactive=False)
calc_memory_button = gr.Button("Calculate Memory")
calc_memory_button.click(calc_mem,
inputs=[hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib],
outputs=memory_result)
hf_model_name_or_path.change(fn=update_from_hf_model,
inputs=[hf_model_name_or_path],
outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length, memory_result])
# Parameter Calculation Tab
with gr.TabItem("Parameter Calculation"):
vocab_size = gr.Number(label="Vocab Size", value=51200)
tied_embeddings = gr.Checkbox(label="Tied Embeddings", value=False)
hidden_size = gr.Number(label="Hidden Size", value=6144)
sequence_length = gr.Number(label="Sequence Length", value=2048)
num_layers = gr.Number(label="Number of Layers", value=44)
ffn_expansion_factor = gr.Number(label="FFN Expansion Factor", value=4)
num_mlp_linears = gr.Number(label="Number of Linear Layers per MLP Block", value=2)
kv_size_ratio = gr.Number(label="KV Size Ratio", value=1.0)
with gr.Accordion("MoE Parameters", open=False):
moe = gr.Checkbox(label="MoE", value=False)
num_experts = gr.Number(label="Number of Experts", value=8)
expert_interval = gr.Number(label="Expert Interval", value=1)
topk = gr.Number(label="Top k Routing", value=1)
param_result = gr.Textbox(label="Parameter Calculation Result", interactive=False)
calc_param_button = gr.Button("Calculate Parameters")
calc_param_button.click(calc_params,
inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
outputs=param_result)
demo.launch()
|