henryholloway commited on
Commit
9e0a736
1 Parent(s): a002825

UI Updates

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -30,9 +30,7 @@ precision_options = {
30
  # Streamlit app
31
  st.title("Memory Usage Calculator for Large Language Models")
32
 
33
- # Constants from the paper, now exposed as user inputs
34
- layers = st.number_input("Number of Layers", value=32, step=1)
35
- attention_heads = st.number_input("Number of Attention Heads", value=32, step=1)
36
 
37
  # Taken from "Reducing Activation Recomputation in Large Transformer Models" https://arxiv.org/abs/2205.05198
38
  def calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision):
@@ -69,6 +67,8 @@ def calculate_activations(parameter_count, context_length, batch_size, vocab_siz
69
 
70
  # User inputs
71
  parameter_count = st.number_input("Parameter Count (in billions)", value=1, step=1) * 1e9
 
 
72
  context_length = st.number_input("Context Length (number of tokens)", value=512, step=1)
73
  data_type = st.selectbox("Data Type", options=list(quantization_bit_sizes.keys()))
74
  batch_size = st.number_input("Batch Size", value=1, step=1)
 
30
  # Streamlit app
31
  st.title("Memory Usage Calculator for Large Language Models")
32
 
33
+
 
 
34
 
35
  # Taken from "Reducing Activation Recomputation in Large Transformer Models" https://arxiv.org/abs/2205.05198
36
  def calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision):
 
67
 
68
  # User inputs
69
  parameter_count = st.number_input("Parameter Count (in billions)", value=1, step=1) * 1e9
70
+ layers = st.number_input("Number of Layers", value=32, step=1)
71
+ attention_heads = st.number_input("Number of Attention Heads", value=32, step=1)
72
  context_length = st.number_input("Context Length (number of tokens)", value=512, step=1)
73
  data_type = st.selectbox("Data Type", options=list(quantization_bit_sizes.keys()))
74
  batch_size = st.number_input("Batch Size", value=1, step=1)