|
gpu,task,model_name,dtype,offload,Greedy,Assisted |
|
3090,OPT: Open Text Generation,1.3B,FP32,0,11.64,10.01 |
|
3090,OPT: Open Text Generation,6.7B,FP32,1,428.47,114.99 |
|
3090,OPT: Open Text Generation,6.7B,FP16,0,19.62,12.44 |
|
3090,OPT: Open Text Generation,6.7B,INT8,0,104.43,40.33 |
|
3090,OPT: Open Text Generation,30B,FP16,1,2616,1099 |
|
3090,OPT: Summarization,1.3B,FP32,0,13.16,10.89 |
|
3090,OPT: Summarization,6.7B,FP32,1,587.8,114.53 |
|
3090,OPT: Summarization,6.7B,FP16,0,25.14,14.56 |
|
3090,OPT: Summarization,30B,FP16,1,2732,331.2 |
|
3090,Whisper: ARS,large-v2,FP32,0,24.81,12.55 |
|
3090,CodeGen: Code Generation,2B,FP32,0,28.90,28.36 |
|
3090,CodeGen: Code Generation,6B,FP32,1,544.11,110.42 |
|
3090,CodeGen: Code Generation,6B,FP16,0,34.36,31.84 |
|
3090,CodeGen: Code Generation,16B,FP16,1,808.69,161.50 |
|
3090,CodeGen: Code Generation,16B,INT8,0,66.69,41.47 |
|
3090,Flan-T5: Summarization,large,FP32,0,21.27,15.76 |
|
3090,Flan-T5: Summarization,xl,FP32,0,25.60,18.94 |
|
3090,Flan-T5: Summarization,xxl,FP32,1,1326.22,580.10 |
|
3090,Flan-T5: Summarization,xxl,FP16,1,52.52,36.07 |
|
3090,Flan-T5: Summarization,xxl,INT8,0,67.13,38.92 |
|
3090,Flan-T5: Summarization,ul2,FP16,1,1185.25,480.11 |
|
|
|
T4,OPT: Open Text Generation,1.3B,FP32,0,24.74,22.37 |
|
T4,OPT: Open Text Generation,6.7B,FP32,1,2863.57,733.32 |
|
T4,OPT: Open Text Generation,6.7B,FP16,0,62.04,29.67 |
|
T4,OPT: Open Text Generation,6.7B,INT8,0,180.59,66.12 |
|
T4,OPT: Summarization,1.3B,FP32,0,32.50,26.58 |
|
T4,OPT: Summarization,6.7B,FP16,1,499.00,67.33 |
|
T4,OPT: Summarization,6.7B,INT8,0,182.98,37.89 |
|
T4,Whisper: ARS,large-v2,FP32,0,62.68,40.74 |
|
T4,CodeGen: Code Generation,2B,FP32,0,73.88,67.62 |
|
T4,CodeGen: Code Generation,6B,FP16,1,682.94,135.99 |
|
T4,CodeGen: Code Generation,6B,INT8,0,117.91,72.40 |
|
T4,Flan-T5: Summarization,large,FP32,0,43.67,36.26 |
|
T4,Flan-T5: Summarization,xl,FP16,0,53.54,42.27 |
|
T4,Flan-T5: Summarization,xxl,FP16,1,2814,1177 |
|
|
|
T4 *2,OPT: Open Text Generation,6.7B,FP32,0,118.42,55.42 |
|
T4 *2,OPT: Open Text Generation,6.7B,FP16,0,61.30,34.76 |
|
T4 *2,OPT: Summarization,6.7B,FP32,1,1238.59,339.34 |
|
T4 *2,OPT: Summarization,6.7B,FP16,0,94.62,34.37 |
|
T4 *2,CodeGen: Code Generation,6B,FP16,0,116.34,72.09 |
|
T4 *2,CodeGen: Code Generation,6B,INT8,0,119.14,79.01 |
|
T4 *2,CodeGen: Code Generation,16B,FP16,1,1509.05,693.01 |
|
T4 *2,CodeGen: Code Generation,16B,INT8,0,200.79,99.00 |
|
T4 *2,Flan-T5: Summarization,xl,FP32,0,59.27,68.70 |
|
T4 *2,Flan-T5: Summarization,xl,FP16,0,51.59,50.56 |
|
T4 *2,Flan-T5: Summarization,xxl,FP16,1,797.7,534.3 |
|
T4 *2,Flan-T5: Summarization,xxl,INT8,0,243.3,143.38 |
|
|
|
A100 (80GB),OPT: Open Text Generation,6.7B,FP32,0,35.34,30.00 |
|
A100 (80GB),OPT: Open Text Generation,30B,FP16,0,54.57,38.27 |
|
A100 (80GB),OPT: Open Text Generation,30B,INT8,0,290.82,135.77 |
|
A100 (80GB),OPT: Open Text Generation,66B,INT8,0,398.49,146.04 |
|
A100 (80GB),OPT: Summarization,6.7B,FP32,0,43.64,27.03 |
|
A100 (80GB),OPT: Summarization,30B,FP16,0,54.94,28.87 |
|
A100 (80GB),OPT: Summarization,30B,INT8,0,291.57,49.42 |
|
A100 (80GB),OPT: Summarization,66B,INT8,0,392.34,82.29 |
|
A100 (80GB),CodeGen: Code Generation,16B,FP32,0,75.56,80.44 |
|
A100 (80GB),CodeGen: Code Generation,16B,FP16,0,70.51,74.79 |
|
A100 (80GB),CodeGen: Code Generation,16B,INT8,0,130.77,90.28 |
|
A100 (80GB),Flan-T5: Summarization,ul2,FP32,0,87.40,59.26 |
|
A100 (80GB),Flan-T5: Summarization,ul2,FP16,0,78.13,42.95 |
|
A100 (80GB),Flan-T5: Summarization,ul2,INT8,0,187.66,81.72 |
|
|