derek-thomas HF staff commited on
Commit
3951475
1 Parent(s): 0847403

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -24
app.py CHANGED
@@ -3,16 +3,6 @@ from transformers import AutoConfig # Required for Hugging Face integration
3
  from calc_params import calc_params # Import calc_params from the new file
4
 
5
  # ---- Helper Functions ---- #
6
- def convert_params(params):
7
- if params == 0:
8
- return "0"
9
- size_name = ("", "K", "M", "B", "T", "P", "E", "Z", "Y")
10
- i = int(math.floor(math.log(params, 1000)))
11
- p = math.pow(1000, i)
12
- s = round(params / p, 2)
13
- return "%s %s" % (s, size_name[i])
14
-
15
- # Get Hugging Face model configuration and update the parameters
16
  def get_hf_model_args(hf_model_name_or_path):
17
  try:
18
  config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
@@ -34,6 +24,20 @@ def get_hf_model_args(hf_model_name_or_path):
34
  "sequence_length": sequence_length,
35
  }, None
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  # ---- Memory Calculation ---- #
38
  def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
39
  model_params, hf_error = get_hf_model_args(hf_model_name_or_path) if hf_model_name_or_path else (None, None)
@@ -62,20 +66,6 @@ def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_par
62
 
63
  return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
64
 
65
- # ---- Update Gradio inputs with Hugging Face model config ---- #
66
- def update_from_hf_model(hf_model_name_or_path):
67
- model_params, hf_error = get_hf_model_args(hf_model_name_or_path)
68
-
69
- if hf_error:
70
- return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), hf_error
71
-
72
- return (gr.update(value=model_params["num_layers"]),
73
- gr.update(value=model_params["hidden_size"]),
74
- gr.update(value=model_params["num_attention_heads"]),
75
- gr.update(value=model_params["vocab_size"]),
76
- gr.update(value=model_params["sequence_length"]),
77
- "")
78
-
79
  # ---- Gradio Interface ---- #
80
  with gr.Blocks() as demo:
81
  with gr.Tabs():
@@ -107,6 +97,7 @@ with gr.Blocks() as demo:
107
 
108
  # Parameter Calculation Tab
109
  with gr.TabItem("Parameter Calculation"):
 
110
  vocab_size = gr.Number(label="Vocab Size", value=51200)
111
  tied_embeddings = gr.Checkbox(label="Tied Embeddings", value=False)
112
  hidden_size = gr.Number(label="Hidden Size", value=6144)
@@ -128,4 +119,8 @@ with gr.Blocks() as demo:
128
  inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
129
  outputs=param_result)
130
 
 
 
 
 
131
  demo.launch()
 
3
  from calc_params import calc_params # Import calc_params from the new file
4
 
5
  # ---- Helper Functions ---- #
 
 
 
 
 
 
 
 
 
 
6
  def get_hf_model_args(hf_model_name_or_path):
7
  try:
8
  config = AutoConfig.from_pretrained(hf_model_name_or_path, trust_remote_code=True).to_dict()
 
24
  "sequence_length": sequence_length,
25
  }, None
26
 
27
+ # ---- Update Gradio inputs with Hugging Face model config ---- #
28
+ def update_from_hf_model(hf_model_name_or_path):
29
+ model_params, hf_error = get_hf_model_args(hf_model_name_or_path)
30
+
31
+ if hf_error:
32
+ return gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), hf_error
33
+
34
+ return (gr.update(value=model_params["num_layers"]),
35
+ gr.update(value=model_params["hidden_size"]),
36
+ gr.update(value=model_params["num_attention_heads"]),
37
+ gr.update(value=model_params["vocab_size"]),
38
+ gr.update(value=model_params["sequence_length"]),
39
+ "")
40
+
41
  # ---- Memory Calculation ---- #
42
  def calc_mem(hf_model_name_or_path, num_gpus, tensor_parallel_size, pipeline_parallel_size, batch_size_per_gpu, sequence_length, vocab_size, hidden_size, num_attention_heads, num_layers, ffn_expansion_factor, is_mixed_precision, misc_mem_gib):
43
  model_params, hf_error = get_hf_model_args(hf_model_name_or_path) if hf_model_name_or_path else (None, None)
 
66
 
67
  return f"Per-GPU Memory Required for Training: {per_gpu_mem_gib:.2f} GiB"
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # ---- Gradio Interface ---- #
70
  with gr.Blocks() as demo:
71
  with gr.Tabs():
 
97
 
98
  # Parameter Calculation Tab
99
  with gr.TabItem("Parameter Calculation"):
100
+ hf_model_name_or_path = gr.Textbox(label="HuggingFace Model Name or Path (optional)", value="")
101
  vocab_size = gr.Number(label="Vocab Size", value=51200)
102
  tied_embeddings = gr.Checkbox(label="Tied Embeddings", value=False)
103
  hidden_size = gr.Number(label="Hidden Size", value=6144)
 
119
  inputs=[vocab_size, tied_embeddings, hidden_size, sequence_length, num_layers, moe, num_experts, expert_interval, topk, ffn_expansion_factor, num_mlp_linears, kv_size_ratio],
120
  outputs=param_result)
121
 
122
+ hf_model_name_or_path.change(fn=update_from_hf_model,
123
+ inputs=[hf_model_name_or_path],
124
+ outputs=[num_layers, hidden_size, num_attention_heads, vocab_size, sequence_length])
125
+
126
  demo.launch()