joaogante's picture
joaogante HF staff
working plots
39b15f4
gpu,task,model_name,dtype,offload,Greedy,Assisted
3090,OPT: Open Text Generation,1.3B,FP32,0,11.64,10.01
3090,OPT: Open Text Generation,6.7B,FP32,1,428.47,114.99
3090,OPT: Open Text Generation,6.7B,FP16,0,19.62,12.44
3090,OPT: Open Text Generation,6.7B,INT8,0,104.43,40.33
3090,OPT: Open Text Generation,30B,FP16,1,2616,1099
3090,OPT: Summarization,1.3B,FP32,0,13.16,10.89
3090,OPT: Summarization,6.7B,FP32,1,587.8,114.53
3090,OPT: Summarization,6.7B,FP16,0,25.14,14.56
3090,OPT: Summarization,30B,FP16,1,2732,331.2
3090,Whisper: ARS,large-v2,FP32,0,24.81,12.55
3090,CodeGen: Code Generation,2B,FP32,0,28.90,28.36
3090,CodeGen: Code Generation,6B,FP32,1,544.11,110.42
3090,CodeGen: Code Generation,6B,FP16,0,34.36,31.84
3090,CodeGen: Code Generation,16B,FP16,1,808.69,161.50
3090,CodeGen: Code Generation,16B,INT8,0,66.69,41.47
3090,Flan-T5: Summarization,large,FP32,0,21.27,15.76
3090,Flan-T5: Summarization,xl,FP32,0,25.60,18.94
3090,Flan-T5: Summarization,xxl,FP32,1,1326.22,580.10
3090,Flan-T5: Summarization,xxl,FP16,1,52.52,36.07
3090,Flan-T5: Summarization,xxl,INT8,0,67.13,38.92
3090,Flan-T5: Summarization,ul2,FP16,1,1185.25,480.11
T4,OPT: Open Text Generation,1.3B,FP32,0,24.74,22.37
T4,OPT: Open Text Generation,6.7B,FP32,1,2863.57,733.32
T4,OPT: Open Text Generation,6.7B,FP16,0,62.04,29.67
T4,OPT: Open Text Generation,6.7B,INT8,0,180.59,66.12
T4,OPT: Summarization,1.3B,FP32,0,32.50,26.58
T4,OPT: Summarization,6.7B,FP16,1,499.00,67.33
T4,OPT: Summarization,6.7B,INT8,0,182.98,37.89
T4,Whisper: ARS,large-v2,FP32,0,62.68,40.74
T4,CodeGen: Code Generation,2B,FP32,0,73.88,67.62
T4,CodeGen: Code Generation,6B,FP16,1,682.94,135.99
T4,CodeGen: Code Generation,6B,INT8,0,117.91,72.40
T4,Flan-T5: Summarization,large,FP32,0,43.67,36.26
T4,Flan-T5: Summarization,xl,FP16,0,53.54,42.27
T4,Flan-T5: Summarization,xxl,FP16,1,2814,1177
T4 *2,OPT: Open Text Generation,6.7B,FP32,0,118.42,55.42
T4 *2,OPT: Open Text Generation,6.7B,FP16,0,61.30,34.76
T4 *2,OPT: Summarization,6.7B,FP32,1,1238.59,339.34
T4 *2,OPT: Summarization,6.7B,FP16,0,94.62,34.37
T4 *2,CodeGen: Code Generation,6B,FP16,0,116.34,72.09
T4 *2,CodeGen: Code Generation,6B,INT8,0,119.14,79.01
T4 *2,CodeGen: Code Generation,16B,FP16,1,1509.05,693.01
T4 *2,CodeGen: Code Generation,16B,INT8,0,200.79,99.00
T4 *2,Flan-T5: Summarization,xl,FP32,0,59.27,68.70
T4 *2,Flan-T5: Summarization,xl,FP16,0,51.59,50.56
T4 *2,Flan-T5: Summarization,xxl,FP16,1,797.7,534.3
T4 *2,Flan-T5: Summarization,xxl,INT8,0,243.3,143.38
A100 (80GB),OPT: Open Text Generation,6.7B,FP32,0,35.34,30.00
A100 (80GB),OPT: Open Text Generation,30B,FP16,0,54.57,38.27
A100 (80GB),OPT: Open Text Generation,30B,INT8,0,290.82,135.77
A100 (80GB),OPT: Open Text Generation,66B,INT8,0,398.49,146.04
A100 (80GB),OPT: Summarization,6.7B,FP32,0,43.64,27.03
A100 (80GB),OPT: Summarization,30B,FP16,0,54.94,28.87
A100 (80GB),OPT: Summarization,30B,INT8,0,291.57,49.42
A100 (80GB),OPT: Summarization,66B,INT8,0,392.34,82.29
A100 (80GB),CodeGen: Code Generation,16B,FP32,0,75.56,80.44
A100 (80GB),CodeGen: Code Generation,16B,FP16,0,70.51,74.79
A100 (80GB),CodeGen: Code Generation,16B,INT8,0,130.77,90.28
A100 (80GB),Flan-T5: Summarization,ul2,FP32,0,87.40,59.26
A100 (80GB),Flan-T5: Summarization,ul2,FP16,0,78.13,42.95
A100 (80GB),Flan-T5: Summarization,ul2,INT8,0,187.66,81.72