Spaces:

Elfsong
/

gcp_allocator

Sleeping

App Files Files Community

gcp_allocator / app.py

Elfsong

Update app.py

3a60d8d verified 6 months ago

raw

history blame contribute delete

6.35 kB

	import numpy as np
	import streamlit as st

	st.title("GCP Resource Allocator")

	st.subheader("Readme")

	st.write("Compute Engine provides NVIDIA GPUs for your VMs in passthrough mode so that your VMs have direct control over the GPUs and their associated memory.")
	st.write("* To run NVIDIA H100 80GB GPUs, you must use an A3 accelerator-optimized machine type.")
	st.write("* To run NVIDIA A100 GPUs, you must use the A2 accelerator-optimized machine type.")
	st.write("* To run NVIDIA L4 GPUs, you must use a G2 accelerator-optimized machine type.")
	st.write("* Each A3/A2/G2 machine type has a fixed GPU count, vCPU count, and memory size.")

	st.markdown("""
	\| GPU \| Memory \| FP64 \| FP32 \| Price \| Interconnect \| Best used for \|
	\| --------- \| ------------------------- \| --------- \| ----------\| --------- \| ----------------------------- \| --------------------------------------------------------------------------------- \|
	\| H100 80GB \| 80 GB HBM3 @ 3.35 TBps \| 34 \| 67 \| 12.11 \| NVLink Full Mesh @ 900 GBps \| Large models with massive data tables for ML Training, Inference, HPC, BERT, DLRM \|
	\| A100 80GB \| 80 GB HBM2e @ 1.9 TBps \| 9.7 \| 19.5 \| 2.61 \| NVLink Full Mesh @ 600 GBps \| Large models with massive data tables for ML Training, Inference, HPC, BERT, DLRM \|
	\| A100 40GB \| 40 GB HBM2 @ 1.6 TBps \| 9.7 \| 19.5 \| 1.67 \| NVLink Full Mesh @ 600 GBps \| ML Training, Inference, HPC \|
	\| L4 \| 24 GB GDDR6 @ 300 GBps \| 0.5 \| 30.3 \| 0.28 \| N/A \| ML Inference, Training, Remote Visualization Workstations, Video Transcoding, HPC \|
	\| T4 \| 16 GB GDDR6 @ 320 GBps \| 0.25 \| 8.1 \| 0.15 \| N/A \| ML Inference, Training, Remote Visualization Workstations, Video Transcoding \|
	\| V100 \| 16 GB HBM2 @ 900 GBps \| 7.8 \| 15.7 \| 0.99 \| NVLink Ring @ 300 GBps \| ML Training, Inference, HPC \|
	\| P4 \| 8 GB GDDR5 @ 192 GBps \| 0.2 \| 5.5 \| 0.30 \| N/A \| Remote Visualization Workstations, ML Inference, and Video Transcoding \|
	\| P100 \| 16 GB HBM2 @ 732 GBps \| 4.7 \| 9.3 \| 0.58 \| N/A \| ML Training, Inference, HPC, Remote Visualization Workstations \|
	""")


	st.subheader("Configuration")

	# GPU Type
	gpu_type = st.selectbox(
	'GPU Type',
	(
	'H100 80GB',
	'A100 80GB',
	'A100 40GB',
	'V100 16GB',
	'P100 16GB',
	'L4 24GB',
	'T4 16GB',
	'P4 8GB',
	)
	)

	# Number of GPUs
	gpu_number_mapping = {
	'H100 80GB': [8],
	'A100 80GB': [1,2,4,8],
	'A100 40GB': [1,2,4,8,16],
	'V100 16GB': [1,2,4,8],
	'P100 16GB': [1,2,4],
	'L4 24GB': [1,2,4,8],
	'T4 16GB': [1,2,4],
	'P4 8GB': [1,2,4],
	}
	gpu_number = st.selectbox('Number of GPUs', gpu_number_mapping[gpu_type])

	# Instance Type
	gpu_type_mapping = {
	'H100 80GB': ["A3"],
	'A100 80GB': ["A2"],
	'A100 40GB': ["A2"],
	'V100 16GB': ["N1", "CUSTOM"],
	'P100 16GB': ["N1", "CUSTOM"],
	'L4 24GB': ["G2", "CUSTOM"],
	'T4 16GB': ["N1", "CUSTOM"],
	'P4 8GB': ["N1", "CUSTOM"],
	}
	instance_type = st.selectbox('Instance Type', gpu_type_mapping[gpu_type])

	# CPU Cores
	cpu_cores_mapping = {
	"A3": [208],
	"A2": [12*gpu_number],
	"G2": [12*gpu_number] if gpu_number > 1 else [4,8,12,16,32],
	"N1": [1,2,4,8,16,32,96],
	"CUSTOM": [1] + [i for i in range(2, 96+1, 2)]
	}
	if gpu_type != "CUSTOM":
	cpu_cores = st.selectbox('Cores (vCPU)', cpu_cores_mapping[instance_type])
	else:
	cpu_cores = st.select_slider('Cores (vCPU)', cpu_cores_mapping[instance_type])

	# Memory Size
	memory_size_mapping = {
	"A3": [1872],
	"A2": [170*gpu_number],
	"G2": [4cpu_cores] if gpu_number > 1 else [48gpu_number],
	"N1": [cpu_cores*3.75],
	"CUSTOM": [i for i in np.arange(cpu_cores, cpu_cores*6.5+1, 1)]
	}
	if gpu_type != "CUSTOM":
	memory_size = st.selectbox('Memory (GB)', memory_size_mapping[instance_type])
	else:
	memory_size = st.select_slider('Memory (GB)', memory_size_mapping[instance_type])

	# Balanced Disk
	balanced_disk_size = st.select_slider('Balanced Disk (GB)', [i for i in range(10, 65536, 10)])

	# SSD Disk
	ssd_disk_size = st.select_slider('SSD Disk (GB)', [i * 375 for i in [0,1,2,3,4,5,6,7,8,16,24]])

	# Hours
	hours = st.select_slider('Duration (Hours)', [i for i in range(1, 168+1)])

	# Pricing Estimate
	serivces_mapping = {
	"Core": {
	"A3": 0.029917642,
	"A2": 0.017880447,
	"G2": 0.016626389,
	"N1": 0.007834495,
	"CUSTOM": 0.00782101,
	},
	"RAM": {
	"A3": 0.002605197,
	"A2": 0.002396196,
	"G2": 0.00194851,
	"N1": 0.001049094,
	"CUSTOM": 0.001047746,
	},
	"GPU": {
	'H100 80GB': 12.112232328,
	'A100 80GB': 2.61383548,
	'A100 40GB': 1.67288707,
	'V100 16GB': 0.997853,
	'P100 16GB': 0.5798335,
	'L4 24GB': 0.279501996,
	'T4 16GB': 0.1483295,
	'P4 8GB': 0.29800745,
	},
	"PD": 0.1483295 / 30 / 24,
	"SSD": 0.108550225 / 30 / 24,
	}

	core_price = serivces_mapping['Core'][instance_type] * cpu_cores
	memory_price = serivces_mapping['RAM'][instance_type] * memory_size
	gpu_price = serivces_mapping['GPU'][gpu_type] * gpu_number
	balanced_disk_price = serivces_mapping['PD'] * balanced_disk_size
	ssd_disk_price = serivces_mapping['SSD'] * ssd_disk_size
	duration_total_price = core_price + memory_price + gpu_price + balanced_disk_price + ssd_disk_price
	total_price = duration_total_price * hours


	st.subheader("Hourly estimate")

	st.write(f"Core: SGD :blue[{core_price:.3f}]")
	st.write(f"Memory: SGD :blue[{memory_price:.3f}]")
	st.write(f"GPU: SGD :blue[{gpu_price:.3f}]")
	st.write(f"Balance Disk: SGD :blue[{balanced_disk_price:.3f}]")
	st.write(f"SSD Disk: SGD :blue[{ssd_disk_price:.3f}]")
	st.write(f"Hourly Total: SGD :blue[{duration_total_price:.3f}]")
	st.write(f"Duration Total: SGD :blue[{total_price:.3f}]")