gcp_allocator / app.py
Elfsong's picture
Update app.py
3a60d8d verified
import numpy as np
import streamlit as st
st.title("GCP Resource Allocator")
st.subheader("Readme")
st.write("Compute Engine provides NVIDIA GPUs for your VMs in passthrough mode so that your VMs have direct control over the GPUs and their associated memory.")
st.write("* To run NVIDIA H100 80GB GPUs, you must use an A3 accelerator-optimized machine type.")
st.write("* To run NVIDIA A100 GPUs, you must use the A2 accelerator-optimized machine type.")
st.write("* To run NVIDIA L4 GPUs, you must use a G2 accelerator-optimized machine type.")
st.write("* Each A3/A2/G2 machine type has a fixed GPU count, vCPU count, and memory size.")
st.markdown("""
| GPU | Memory | FP64 | FP32 | Price | Interconnect | Best used for |
| --------- | ------------------------- | --------- | ----------| --------- | ----------------------------- | --------------------------------------------------------------------------------- |
| H100 80GB | 80 GB HBM3 @ 3.35 TBps | 34 | 67 | 12.11 | NVLink Full Mesh @ 900 GBps | Large models with massive data tables for ML Training, Inference, HPC, BERT, DLRM |
| A100 80GB | 80 GB HBM2e @ 1.9 TBps | 9.7 | 19.5 | 2.61 | NVLink Full Mesh @ 600 GBps | Large models with massive data tables for ML Training, Inference, HPC, BERT, DLRM |
| A100 40GB | 40 GB HBM2 @ 1.6 TBps | 9.7 | 19.5 | 1.67 | NVLink Full Mesh @ 600 GBps | ML Training, Inference, HPC |
| L4 | 24 GB GDDR6 @ 300 GBps | 0.5 | 30.3 | 0.28 | N/A | ML Inference, Training, Remote Visualization Workstations, Video Transcoding, HPC |
| T4 | 16 GB GDDR6 @ 320 GBps | 0.25 | 8.1 | 0.15 | N/A | ML Inference, Training, Remote Visualization Workstations, Video Transcoding |
| V100 | 16 GB HBM2 @ 900 GBps | 7.8 | 15.7 | 0.99 | NVLink Ring @ 300 GBps | ML Training, Inference, HPC |
| P4 | 8 GB GDDR5 @ 192 GBps | 0.2 | 5.5 | 0.30 | N/A | Remote Visualization Workstations, ML Inference, and Video Transcoding |
| P100 | 16 GB HBM2 @ 732 GBps | 4.7 | 9.3 | 0.58 | N/A | ML Training, Inference, HPC, Remote Visualization Workstations |
""")
st.subheader("Configuration")
# GPU Type
gpu_type = st.selectbox(
'GPU Type',
(
'H100 80GB',
'A100 80GB',
'A100 40GB',
'V100 16GB',
'P100 16GB',
'L4 24GB',
'T4 16GB',
'P4 8GB',
)
)
# Number of GPUs
gpu_number_mapping = {
'H100 80GB': [8],
'A100 80GB': [1,2,4,8],
'A100 40GB': [1,2,4,8,16],
'V100 16GB': [1,2,4,8],
'P100 16GB': [1,2,4],
'L4 24GB': [1,2,4,8],
'T4 16GB': [1,2,4],
'P4 8GB': [1,2,4],
}
gpu_number = st.selectbox('Number of GPUs', gpu_number_mapping[gpu_type])
# Instance Type
gpu_type_mapping = {
'H100 80GB': ["A3"],
'A100 80GB': ["A2"],
'A100 40GB': ["A2"],
'V100 16GB': ["N1", "CUSTOM"],
'P100 16GB': ["N1", "CUSTOM"],
'L4 24GB': ["G2", "CUSTOM"],
'T4 16GB': ["N1", "CUSTOM"],
'P4 8GB': ["N1", "CUSTOM"],
}
instance_type = st.selectbox('Instance Type', gpu_type_mapping[gpu_type])
# CPU Cores
cpu_cores_mapping = {
"A3": [208],
"A2": [12*gpu_number],
"G2": [12*gpu_number] if gpu_number > 1 else [4,8,12,16,32],
"N1": [1,2,4,8,16,32,96],
"CUSTOM": [1] + [i for i in range(2, 96+1, 2)]
}
if gpu_type != "CUSTOM":
cpu_cores = st.selectbox('Cores (vCPU)', cpu_cores_mapping[instance_type])
else:
cpu_cores = st.select_slider('Cores (vCPU)', cpu_cores_mapping[instance_type])
# Memory Size
memory_size_mapping = {
"A3": [1872],
"A2": [170*gpu_number],
"G2": [4*cpu_cores] if gpu_number > 1 else [48*gpu_number],
"N1": [cpu_cores*3.75],
"CUSTOM": [i for i in np.arange(cpu_cores, cpu_cores*6.5+1, 1)]
}
if gpu_type != "CUSTOM":
memory_size = st.selectbox('Memory (GB)', memory_size_mapping[instance_type])
else:
memory_size = st.select_slider('Memory (GB)', memory_size_mapping[instance_type])
# Balanced Disk
balanced_disk_size = st.select_slider('Balanced Disk (GB)', [i for i in range(10, 65536, 10)])
# SSD Disk
ssd_disk_size = st.select_slider('SSD Disk (GB)', [i * 375 for i in [0,1,2,3,4,5,6,7,8,16,24]])
# Hours
hours = st.select_slider('Duration (Hours)', [i for i in range(1, 168+1)])
# Pricing Estimate
serivces_mapping = {
"Core": {
"A3": 0.029917642,
"A2": 0.017880447,
"G2": 0.016626389,
"N1": 0.007834495,
"CUSTOM": 0.00782101,
},
"RAM": {
"A3": 0.002605197,
"A2": 0.002396196,
"G2": 0.00194851,
"N1": 0.001049094,
"CUSTOM": 0.001047746,
},
"GPU": {
'H100 80GB': 12.112232328,
'A100 80GB': 2.61383548,
'A100 40GB': 1.67288707,
'V100 16GB': 0.997853,
'P100 16GB': 0.5798335,
'L4 24GB': 0.279501996,
'T4 16GB': 0.1483295,
'P4 8GB': 0.29800745,
},
"PD": 0.1483295 / 30 / 24,
"SSD": 0.108550225 / 30 / 24,
}
core_price = serivces_mapping['Core'][instance_type] * cpu_cores
memory_price = serivces_mapping['RAM'][instance_type] * memory_size
gpu_price = serivces_mapping['GPU'][gpu_type] * gpu_number
balanced_disk_price = serivces_mapping['PD'] * balanced_disk_size
ssd_disk_price = serivces_mapping['SSD'] * ssd_disk_size
duration_total_price = core_price + memory_price + gpu_price + balanced_disk_price + ssd_disk_price
total_price = duration_total_price * hours
st.subheader("Hourly estimate")
st.write(f"Core: SGD :blue[{core_price:.3f}]")
st.write(f"Memory: SGD :blue[{memory_price:.3f}]")
st.write(f"GPU: SGD :blue[{gpu_price:.3f}]")
st.write(f"Balance Disk: SGD :blue[{balanced_disk_price:.3f}]")
st.write(f"SSD Disk: SGD :blue[{ssd_disk_price:.3f}]")
st.write(f"Hourly Total: SGD :blue[{duration_total_price:.3f}]")
st.write(f"Duration Total: SGD :blue[{total_price:.3f}]")