fxmarty's picture
add experience
6dd016c
[
{
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english",
"task": "token-classification",
"task_args": null,
"dataset": {
"path": "conll2003",
"eval_split": "validation",
"data_keys": {
"primary": "tokens",
"secondary": null
},
"ref_keys": [
"ner_tags"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "static",
"operators_to_quantize": [
"Add",
"MatMul"
],
"node_exclusion": [],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.670\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 40,
"throughput": 2.67,
"latency_mean": 375.10382185000003,
"latency_std": 41.77851887343237,
"latency_50": 402.085788,
"latency_90": 405.6853881,
"latency_95": 406.5309876,
"latency_99": 407.68657795999997,
"latency_999": 407.928640196
},
"optimized": {
"nb_forwards": 67,
"throughput": 4.47,
"latency_mean": 225.09343717910448,
"latency_std": 5.4422008927739745,
"latency_50": 224.468437,
"latency_90": 233.1922012,
"latency_95": 234.06977830000002,
"latency_99": 238.47171616,
"latency_999": 239.621179816
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 226,
"throughput": 15.07,
"latency_mean": 66.56901730973452,
"latency_std": 7.513087298346711,
"latency_50": 71.2563305,
"latency_90": 72.6457545,
"latency_95": 72.8911035,
"latency_99": 73.828916,
"latency_999": 74.4237675
},
"optimized": {
"nb_forwards": 436,
"throughput": 29.07,
"latency_mean": 34.45165110091743,
"latency_std": 1.0538553485588218,
"latency_50": 34.582433,
"latency_90": 34.9882515,
"latency_95": 35.17064425,
"latency_99": 35.965547449999995,
"latency_999": 38.29062908499999
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 126,
"throughput": 8.4,
"latency_mean": 119.99883396031747,
"latency_std": 0.5389393608027111,
"latency_50": 119.924409,
"latency_90": 120.578213,
"latency_95": 120.87888125,
"latency_99": 121.94754125,
"latency_999": 122.377029
},
"optimized": {
"nb_forwards": 260,
"throughput": 17.33,
"latency_mean": 57.854139219230774,
"latency_std": 2.41471729691721,
"latency_50": 57.400247,
"latency_90": 61.3181592,
"latency_95": 62.38365544999999,
"latency_99": 64.46206894,
"latency_999": 65.22337084999998
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 348,
"throughput": 23.2,
"latency_mean": 43.22926725,
"latency_std": 1.1946103356932485,
"latency_50": 43.0492745,
"latency_90": 44.705008299999996,
"latency_95": 45.636852299999994,
"latency_99": 46.63035402,
"latency_999": 46.997215917999995
},
"optimized": {
"nb_forwards": 789,
"throughput": 52.6,
"latency_mean": 19.02440978073511,
"latency_std": 1.1428706688800712,
"latency_50": 18.854472,
"latency_90": 20.772688400000003,
"latency_95": 21.339546999999996,
"latency_99": 21.90764228,
"latency_999": 22.18167628399999
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 204,
"throughput": 13.6,
"latency_mean": 73.54620642647059,
"latency_std": 1.573277335192714,
"latency_50": 73.3753375,
"latency_90": 75.5904331,
"latency_95": 76.15987129999999,
"latency_99": 78.56596293,
"latency_999": 80.276804319
},
"optimized": {
"nb_forwards": 396,
"throughput": 26.4,
"latency_mean": 37.94637206818182,
"latency_std": 0.22863560761755683,
"latency_50": 37.917286,
"latency_90": 38.2418925,
"latency_95": 38.31172375,
"latency_99": 38.762135050000005,
"latency_999": 38.947024510000006
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 70,
"throughput": 4.67,
"latency_mean": 215.9852832857143,
"latency_std": 2.6695382982970624,
"latency_50": 215.4558835,
"latency_90": 219.254347,
"latency_95": 219.83214235,
"latency_99": 222.17290862000002,
"latency_999": 222.546938162
},
"optimized": {
"nb_forwards": 116,
"throughput": 7.73,
"latency_mean": 130.388671,
"latency_std": 0.9538270722224035,
"latency_50": 130.420778,
"latency_90": 131.4585225,
"latency_95": 131.97905225,
"latency_99": 133.03534015,
"latency_999": 133.14044914
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 68,
"throughput": 4.53,
"latency_mean": 222.9005361617647,
"latency_std": 2.9786389600252616,
"latency_50": 222.130681,
"latency_90": 226.3922357,
"latency_95": 227.17735725,
"latency_99": 231.35961219,
"latency_999": 232.82185281900001
},
"optimized": {
"nb_forwards": 112,
"throughput": 7.47,
"latency_mean": 135.05900691964285,
"latency_std": 0.852188728183432,
"latency_50": 135.0183545,
"latency_90": 136.08118190000002,
"latency_95": 136.6358585,
"latency_99": 137.6042464,
"latency_999": 137.777202544
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 127,
"throughput": 8.47,
"latency_mean": 118.38734347244095,
"latency_std": 3.928549187092105,
"latency_50": 118.722872,
"latency_90": 119.61111340000001,
"latency_95": 120.56405670000001,
"latency_99": 122.34993956,
"latency_999": 122.379941486
},
"optimized": {
"nb_forwards": 233,
"throughput": 15.53,
"latency_mean": 64.56411339484978,
"latency_std": 0.8101063203434803,
"latency_50": 64.369473,
"latency_90": 65.578687,
"latency_95": 66.343236,
"latency_99": 67.23023495999999,
"latency_999": 67.842266136
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 314,
"throughput": 20.93,
"latency_mean": 47.871883372611464,
"latency_std": 0.8675741645315053,
"latency_50": 47.86284,
"latency_90": 48.8556855,
"latency_95": 49.34610644999999,
"latency_99": 50.61812615,
"latency_999": 50.940127244
},
"optimized": {
"nb_forwards": 1419,
"throughput": 94.6,
"latency_mean": 10.575771353770262,
"latency_std": 0.6992989868391869,
"latency_50": 10.433279,
"latency_90": 11.4260774,
"latency_95": 12.119523699999998,
"latency_99": 12.74776788,
"latency_999": 13.092563020000012
}
}
],
"others": {
"baseline": {
"precision": 0.9358012339503085,
"recall": 0.9444631437226523,
"f1": 0.9401122372057961,
"accuracy": 0.9882013940267124
},
"optimized": {
"precision": 0.06543578604398588,
"recall": 0.24335240659710536,
"f1": 0.10313837375178317,
"accuracy": 0.35697597445582335
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "distilbert"
},
{
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english",
"task": "token-classification",
"task_args": null,
"dataset": {
"path": "conll2003",
"eval_split": "validation",
"data_keys": {
"primary": "tokens",
"secondary": null
},
"ref_keys": [
"ner_tags"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "static",
"operators_to_quantize": [
"Add",
"MatMul"
],
"node_exclusion": [
"layernorm",
"gelu",
"residual",
"gather",
"softmax"
],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3105.038\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 315,
"throughput": 21.0,
"latency_mean": 47.722406904761904,
"latency_std": 0.78575656702049,
"latency_50": 47.657137,
"latency_90": 48.539485,
"latency_95": 49.1951445,
"latency_99": 50.459615660000004,
"latency_999": 51.17226236
},
"optimized": {
"nb_forwards": 1829,
"throughput": 121.93,
"latency_mean": 8.204672595407327,
"latency_std": 0.6281598971222003,
"latency_50": 8.030814,
"latency_90": 9.035519800000001,
"latency_95": 9.1478804,
"latency_99": 9.59396184,
"latency_999": 10.840923620000012
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 140,
"throughput": 9.33,
"latency_mean": 107.46691665714286,
"latency_std": 14.217711617362141,
"latency_50": 118.404271,
"latency_90": 119.217932,
"latency_95": 120.1323987,
"latency_99": 121.88812750999999,
"latency_999": 122.23331556199999
},
"optimized": {
"nb_forwards": 336,
"throughput": 22.4,
"latency_mean": 44.742087273809524,
"latency_std": 1.297316432066614,
"latency_50": 44.5606265,
"latency_90": 46.6604945,
"latency_95": 47.15595925,
"latency_99": 47.76239855,
"latency_999": 48.352806460000004
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 69,
"throughput": 4.6,
"latency_mean": 220.25039004347826,
"latency_std": 1.5005813984244252,
"latency_50": 219.820796,
"latency_90": 222.1719386,
"latency_95": 224.2704662,
"latency_99": 225.16129407999998,
"latency_999": 225.809551408
},
"optimized": {
"nb_forwards": 160,
"throughput": 10.67,
"latency_mean": 94.04712534375,
"latency_std": 4.044440830749728,
"latency_50": 92.7247505,
"latency_90": 100.1824987,
"latency_95": 103.149005,
"latency_99": 106.80153862,
"latency_999": 107.010720374
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 251,
"throughput": 16.73,
"latency_mean": 59.98497594422311,
"latency_std": 1.3002235321937636,
"latency_50": 59.807255,
"latency_90": 61.620392,
"latency_95": 62.9347585,
"latency_99": 63.5797715,
"latency_999": 64.073255
},
"optimized": {
"nb_forwards": 933,
"throughput": 62.2,
"latency_mean": 16.080496909967845,
"latency_std": 0.6562666342873719,
"latency_50": 16.202643,
"latency_90": 16.5627894,
"latency_95": 16.7647474,
"latency_99": 17.10871436,
"latency_999": 17.232195299999997
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 271,
"throughput": 18.07,
"latency_mean": 55.44747836162362,
"latency_std": 1.1058764508579348,
"latency_50": 55.358138,
"latency_90": 56.887965,
"latency_95": 57.3142435,
"latency_99": 58.44232040000001,
"latency_999": 58.78495296
},
"optimized": {
"nb_forwards": 540,
"throughput": 36.0,
"latency_mean": 27.828701338888887,
"latency_std": 0.2830425296733134,
"latency_50": 27.783282,
"latency_90": 28.192628600000003,
"latency_95": 28.425469149999998,
"latency_99": 28.75255937,
"latency_999": 28.907412832000002
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 211,
"throughput": 14.07,
"latency_mean": 71.34490764454976,
"latency_std": 0.8606794549682575,
"latency_50": 71.218379,
"latency_90": 72.307616,
"latency_95": 72.7004855,
"latency_99": 73.44670070000001,
"latency_999": 76.77649755999997
},
"optimized": {
"nb_forwards": 473,
"throughput": 31.53,
"latency_mean": 31.754805171247355,
"latency_std": 0.4300285133323451,
"latency_50": 31.703291,
"latency_90": 32.3621104,
"latency_95": 32.6649158,
"latency_99": 33.04651476,
"latency_999": 33.134080912
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 125,
"throughput": 8.33,
"latency_mean": 120.569009496,
"latency_std": 0.8160222004383323,
"latency_50": 120.371051,
"latency_90": 121.2576836,
"latency_95": 122.412088,
"latency_99": 123.63907148,
"latency_999": 124.379808824
},
"optimized": {
"nb_forwards": 275,
"throughput": 18.33,
"latency_mean": 54.700663727272726,
"latency_std": 0.6805625823558532,
"latency_50": 54.534815,
"latency_90": 55.637867,
"latency_95": 56.2308853,
"latency_99": 57.06877728,
"latency_999": 57.400091958
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 45,
"throughput": 3.0,
"latency_mean": 337.56930973333334,
"latency_std": 42.73015832642524,
"latency_50": 311.940976,
"latency_90": 408.1195232,
"latency_95": 409.32610719999997,
"latency_99": 409.70182116,
"latency_999": 409.771442316
},
"optimized": {
"nb_forwards": 73,
"throughput": 4.87,
"latency_mean": 205.58670958904108,
"latency_std": 17.128151144285876,
"latency_50": 216.043924,
"latency_90": 219.5796966,
"latency_95": 221.6779058,
"latency_99": 224.22378336000003,
"latency_999": 224.454495336
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 70,
"throughput": 4.67,
"latency_mean": 215.1244777,
"latency_std": 1.555883759703903,
"latency_50": 214.6171285,
"latency_90": 216.7817589,
"latency_95": 218.67742330000002,
"latency_99": 220.89366772999998,
"latency_999": 221.10970307300002
},
"optimized": {
"nb_forwards": 173,
"throughput": 11.53,
"latency_mean": 86.89619405780347,
"latency_std": 2.3718385044122723,
"latency_50": 86.535865,
"latency_90": 90.37287020000001,
"latency_95": 91.8051702,
"latency_99": 92.75978772,
"latency_999": 92.817659772
}
}
],
"others": {
"baseline": {
"precision": 0.9358012339503085,
"recall": 0.9444631437226523,
"f1": 0.9401122372057961,
"accuracy": 0.9882013940267124
},
"optimized": {
"precision": 0.9038969616908851,
"recall": 0.9212386401884888,
"f1": 0.912485414235706,
"accuracy": 0.9842295860753086
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "distilbert"
},
{
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english",
"task": "token-classification",
"task_args": null,
"dataset": {
"path": "conll2003",
"eval_split": "validation",
"data_keys": {
"primary": "tokens",
"secondary": null
},
"ref_keys": [
"ner_tags"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add",
"MatMul"
],
"node_exclusion": [],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.033\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 167,
"throughput": 11.13,
"latency_mean": 90.0069231257485,
"latency_std": 1.7115040048180659,
"latency_50": 89.808392,
"latency_90": 92.1615498,
"latency_95": 93.00187689999998,
"latency_99": 94.87745056,
"latency_999": 96.02146185400001
},
"optimized": {
"nb_forwards": 347,
"throughput": 23.13,
"latency_mean": 43.265280452449566,
"latency_std": 2.7751472818818734,
"latency_50": 44.883167,
"latency_90": 45.35501180000001,
"latency_95": 46.0536215,
"latency_99": 46.73552832,
"latency_999": 46.9292037
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 311,
"throughput": 20.73,
"latency_mean": 48.2705229710611,
"latency_std": 1.1247628435592778,
"latency_50": 48.262398,
"latency_90": 49.675041,
"latency_95": 50.1826075,
"latency_99": 51.5223755,
"latency_999": 52.486126479999996
},
"optimized": {
"nb_forwards": 1955,
"throughput": 130.33,
"latency_mean": 7.675419680306905,
"latency_std": 0.3408992210776522,
"latency_50": 7.708051,
"latency_90": 7.9747872,
"latency_95": 8.22643,
"latency_99": 8.42614896,
"latency_999": 9.247490550000006
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 70,
"throughput": 4.67,
"latency_mean": 214.59417684285717,
"latency_std": 1.7664217544171172,
"latency_50": 214.2422325,
"latency_90": 216.45445769999998,
"latency_95": 218.51388645,
"latency_99": 220.63894481,
"latency_999": 221.498688881
},
"optimized": {
"nb_forwards": 171,
"throughput": 11.4,
"latency_mean": 87.8829150994152,
"latency_std": 0.9834434065965216,
"latency_50": 87.63179,
"latency_90": 88.639136,
"latency_95": 90.3700155,
"latency_99": 91.69530259999999,
"latency_999": 92.0263997
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 170,
"throughput": 11.33,
"latency_mean": 88.68199067647059,
"latency_std": 1.5268934268549699,
"latency_50": 88.7044375,
"latency_90": 90.6080309,
"latency_95": 91.1070495,
"latency_99": 92.72121441,
"latency_999": 94.157151444
},
"optimized": {
"nb_forwards": 347,
"throughput": 23.13,
"latency_mean": 43.326895181556196,
"latency_std": 0.4931306808409643,
"latency_50": 43.189003,
"latency_90": 44.0324068,
"latency_95": 44.3867949,
"latency_99": 45.111986800000004,
"latency_999": 45.838947726
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 205,
"throughput": 13.67,
"latency_mean": 73.52497626829268,
"latency_std": 1.0865490902318493,
"latency_50": 73.373698,
"latency_90": 74.60406520000001,
"latency_95": 74.979193,
"latency_99": 76.63692036,
"latency_999": 80.98970668399998
},
"optimized": {
"nb_forwards": 562,
"throughput": 37.47,
"latency_mean": 26.697691247330958,
"latency_std": 0.33379520623836,
"latency_50": 26.612126,
"latency_90": 27.0071515,
"latency_95": 27.35591295,
"latency_99": 27.96691773,
"latency_999": 28.784246988999993
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 210,
"throughput": 14.0,
"latency_mean": 71.64548409523809,
"latency_std": 0.9364269258806002,
"latency_50": 71.3511745,
"latency_90": 73.0493582,
"latency_95": 73.74027225,
"latency_99": 74.29445254000001,
"latency_999": 76.295386572
},
"optimized": {
"nb_forwards": 700,
"throughput": 46.67,
"latency_mean": 21.43839562,
"latency_std": 0.7747656036456344,
"latency_50": 21.458011,
"latency_90": 22.4099538,
"latency_95": 22.8598522,
"latency_99": 23.88133127,
"latency_999": 24.30084730600001
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 341,
"throughput": 22.73,
"latency_mean": 44.00805451906158,
"latency_std": 2.0146525121369048,
"latency_50": 43.40077,
"latency_90": 46.844243,
"latency_95": 48.01232,
"latency_99": 48.8285812,
"latency_999": 49.91079640000002
},
"optimized": {
"nb_forwards": 1191,
"throughput": 79.4,
"latency_mean": 12.596608948782535,
"latency_std": 0.4604972017819279,
"latency_50": 12.560364,
"latency_90": 13.254538,
"latency_95": 13.4481775,
"latency_99": 13.824474499999997,
"latency_999": 14.306386329999988
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 89,
"throughput": 5.93,
"latency_mean": 170.04430629213482,
"latency_std": 1.7973801840824346,
"latency_50": 169.804268,
"latency_90": 172.3942636,
"latency_95": 172.909571,
"latency_99": 174.41490236,
"latency_999": 174.83032853600002
},
"optimized": {
"nb_forwards": 184,
"throughput": 12.27,
"latency_mean": 81.67628805434782,
"latency_std": 2.2754557516244045,
"latency_50": 81.0874735,
"latency_90": 85.0994149,
"latency_95": 85.74545605,
"latency_99": 88.41751725,
"latency_999": 89.711957766
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 37,
"throughput": 2.47,
"latency_mean": 414.4216835675676,
"latency_std": 3.861346436570963,
"latency_50": 415.141617,
"latency_90": 419.931193,
"latency_95": 421.07318360000005,
"latency_99": 421.86215943999997,
"latency_999": 422.088350644
},
"optimized": {
"nb_forwards": 98,
"throughput": 6.53,
"latency_mean": 154.51860542857142,
"latency_std": 3.911988556596126,
"latency_50": 153.7217105,
"latency_90": 159.4319015,
"latency_95": 161.4452743,
"latency_99": 163.25851040999999,
"latency_999": 163.55056994100002
}
}
],
"others": {
"baseline": {
"precision": 0.9358012339503085,
"recall": 0.9444631437226523,
"f1": 0.9401122372057961,
"accuracy": 0.9882013940267124
},
"optimized": {
"precision": 0.9337560487235108,
"recall": 0.9417704476607203,
"f1": 0.9377461248428991,
"accuracy": 0.9878314707371209
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "distilbert"
},
{
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english",
"task": "token-classification",
"task_args": null,
"dataset": {
"path": "conll2003",
"eval_split": "validation",
"data_keys": {
"primary": "tokens",
"secondary": null
},
"ref_keys": [
"ner_tags"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add"
],
"node_exclusion": [
"layernorm",
"gelu",
"residual",
"gather",
"softmax"
],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3125.174\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 210,
"throughput": 14.0,
"latency_mean": 71.72416266666667,
"latency_std": 1.0823165199145606,
"latency_50": 71.4790375,
"latency_90": 72.9353617,
"latency_95": 73.99241959999999,
"latency_99": 75.17633138,
"latency_999": 75.422236178
},
"optimized": {
"nb_forwards": 272,
"throughput": 18.13,
"latency_mean": 55.160912452205885,
"latency_std": 0.6521145589906397,
"latency_50": 55.189684,
"latency_90": 55.7469555,
"latency_95": 55.9487205,
"latency_99": 57.71467353,
"latency_999": 58.06304184299999
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 159,
"throughput": 10.6,
"latency_mean": 94.34125967295597,
"latency_std": 10.03252637987446,
"latency_50": 91.050963,
"latency_90": 120.90638,
"latency_95": 121.39196709999999,
"latency_99": 122.02035808,
"latency_999": 122.622120378
},
"optimized": {
"nb_forwards": 141,
"throughput": 9.4,
"latency_mean": 107.02119182269503,
"latency_std": 0.8366529782537496,
"latency_50": 106.835229,
"latency_90": 108.249955,
"latency_95": 108.628056,
"latency_99": 109.2045378,
"latency_999": 110.26474030000001
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 248,
"throughput": 16.53,
"latency_mean": 60.49658227016129,
"latency_std": 0.8819758121225761,
"latency_50": 60.343132,
"latency_90": 61.6551285,
"latency_95": 62.34768465,
"latency_99": 63.31741764,
"latency_999": 63.487983883999995
},
"optimized": {
"nb_forwards": 503,
"throughput": 33.53,
"latency_mean": 29.869608675944335,
"latency_std": 0.3559872739101281,
"latency_50": 29.794297,
"latency_90": 30.1428484,
"latency_95": 30.4600473,
"latency_99": 30.75200356,
"latency_999": 33.756738917999996
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 445,
"throughput": 29.67,
"latency_mean": 33.73988259325843,
"latency_std": 1.5629102688800236,
"latency_50": 33.480773,
"latency_90": 35.338715799999996,
"latency_95": 35.7559136,
"latency_99": 36.873395439999996,
"latency_999": 47.8180268919997
},
"optimized": {
"nb_forwards": 1019,
"throughput": 67.93,
"latency_mean": 14.732223261040234,
"latency_std": 0.9828893965554253,
"latency_50": 14.401579,
"latency_90": 16.897523,
"latency_95": 17.0312649,
"latency_99": 17.46899586,
"latency_999": 17.893012501999998
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 210,
"throughput": 14.0,
"latency_mean": 71.59536255238095,
"latency_std": 1.13338439776916,
"latency_50": 71.154012,
"latency_90": 73.357109,
"latency_95": 73.92077855,
"latency_99": 74.25820067000001,
"latency_999": 75.003235419
},
"optimized": {
"nb_forwards": 282,
"throughput": 18.8,
"latency_mean": 53.25616293617021,
"latency_std": 4.987232229454594,
"latency_50": 56.9742995,
"latency_90": 57.4831075,
"latency_95": 58.0667945,
"latency_99": 59.20656624,
"latency_999": 62.057965914999976
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 48,
"throughput": 3.2,
"latency_mean": 314.12435689583333,
"latency_std": 4.446221158816441,
"latency_50": 314.1643195,
"latency_90": 319.1971656,
"latency_95": 322.43416895,
"latency_99": 326.67617025,
"latency_999": 327.501665325
},
"optimized": {
"nb_forwards": 47,
"throughput": 3.13,
"latency_mean": 323.93693336170213,
"latency_std": 6.869443185578473,
"latency_50": 323.040004,
"latency_90": 334.16514060000003,
"latency_95": 334.9778985,
"latency_99": 336.97889436,
"latency_999": 337.249830036
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 70,
"throughput": 4.67,
"latency_mean": 216.0601891,
"latency_std": 2.0959098467954616,
"latency_50": 215.485022,
"latency_90": 219.06993409999998,
"latency_95": 219.52531109999998,
"latency_99": 222.29498567000002,
"latency_999": 223.08344266699999
},
"optimized": {
"nb_forwards": 92,
"throughput": 6.13,
"latency_mean": 163.75156455434782,
"latency_std": 7.2184659324399,
"latency_50": 161.7846625,
"latency_90": 176.00411860000003,
"latency_95": 179.22768994999998,
"latency_99": 183.41052014000002,
"latency_999": 188.60359111400004
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 165,
"throughput": 11.0,
"latency_mean": 91.31757051515152,
"latency_std": 2.4793130848747467,
"latency_50": 91.036743,
"latency_90": 93.733273,
"latency_95": 95.67535299999999,
"latency_99": 100.79730819999997,
"latency_999": 102.770634928
},
"optimized": {
"nb_forwards": 163,
"throughput": 10.87,
"latency_mean": 92.08470889570552,
"latency_std": 11.432354704165576,
"latency_50": 84.815059,
"latency_90": 105.0187196,
"latency_95": 105.2827365,
"latency_99": 106.12717995999999,
"latency_999": 106.21855146600001
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 80,
"throughput": 5.33,
"latency_mean": 188.59378125,
"latency_std": 22.91815791529492,
"latency_50": 172.9137385,
"latency_90": 219.50391,
"latency_95": 220.21657405000002,
"latency_99": 221.94066489,
"latency_999": 223.095406389
},
"optimized": {
"nb_forwards": 88,
"throughput": 5.87,
"latency_mean": 171.786525,
"latency_std": 7.080702021982688,
"latency_50": 170.577715,
"latency_90": 184.1455852,
"latency_95": 186.60128294999998,
"latency_99": 190.34417446,
"latency_999": 190.786445746
}
}
],
"others": {
"baseline": {
"precision": 0.9358012339503085,
"recall": 0.9444631437226523,
"f1": 0.9401122372057961,
"accuracy": 0.9882013940267124
},
"optimized": {
"precision": 0.9358012339503085,
"recall": 0.9444631437226523,
"f1": 0.9401122372057961,
"accuracy": 0.9882013940267124
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "distilbert"
},
{
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english",
"task": "token-classification",
"task_args": null,
"dataset": {
"path": "conll2003",
"eval_split": "validation",
"data_keys": {
"primary": "tokens",
"secondary": null
},
"ref_keys": [
"ner_tags"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add"
],
"node_exclusion": [],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3102.480\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 37,
"throughput": 2.47,
"latency_mean": 408.1543652972973,
"latency_std": 3.7417217521654447,
"latency_50": 408.204875,
"latency_90": 412.71899360000003,
"latency_95": 415.5526328,
"latency_99": 417.56349056,
"latency_999": 418.43689865600004
},
"optimized": {
"nb_forwards": 47,
"throughput": 3.13,
"latency_mean": 325.0274505106383,
"latency_std": 16.42866903352087,
"latency_50": 323.280477,
"latency_90": 336.7398878,
"latency_95": 340.73717709999994,
"latency_99": 392.46748674,
"latency_999": 395.68078127399997
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 270,
"throughput": 18.0,
"latency_mean": 55.55779645555556,
"latency_std": 1.1666476240332937,
"latency_50": 55.618944,
"latency_90": 57.023193799999994,
"latency_95": 57.38550095,
"latency_99": 58.569932480000006,
"latency_999": 59.960250984
},
"optimized": {
"nb_forwards": 342,
"throughput": 22.8,
"latency_mean": 43.866120190058474,
"latency_std": 1.226638661359772,
"latency_50": 43.698571,
"latency_90": 45.555251,
"latency_95": 46.4943021,
"latency_99": 47.907215019999995,
"latency_999": 48.302261889
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 126,
"throughput": 8.4,
"latency_mean": 119.10938425396826,
"latency_std": 5.242609662991225,
"latency_50": 119.8814875,
"latency_90": 122.013498,
"latency_95": 122.8530875,
"latency_99": 124.503561,
"latency_999": 125.157123625
},
"optimized": {
"nb_forwards": 182,
"throughput": 12.13,
"latency_mean": 82.46068521428572,
"latency_std": 1.8851982998377101,
"latency_50": 82.4162485,
"latency_90": 84.9214402,
"latency_95": 85.51261295,
"latency_99": 87.40860171,
"latency_999": 88.40375641499999
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 448,
"throughput": 29.87,
"latency_mean": 33.48908037053571,
"latency_std": 1.1547087313019704,
"latency_50": 33.380541,
"latency_90": 35.0732339,
"latency_95": 35.621661700000004,
"latency_99": 36.54991294999999,
"latency_999": 37.980162713999995
},
"optimized": {
"nb_forwards": 1059,
"throughput": 70.6,
"latency_mean": 14.167183152974506,
"latency_std": 0.3567793828104339,
"latency_50": 14.113569,
"latency_90": 14.613918199999999,
"latency_95": 14.760972,
"latency_99": 15.203892,
"latency_999": 15.846131796
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 69,
"throughput": 4.6,
"latency_mean": 219.8001221014493,
"latency_std": 1.682059211691071,
"latency_50": 219.207299,
"latency_90": 222.769191,
"latency_95": 223.179612,
"latency_99": 224.12751892,
"latency_999": 225.31481789199998
},
"optimized": {
"nb_forwards": 92,
"throughput": 6.13,
"latency_mean": 163.62187851086955,
"latency_std": 3.9886368861391612,
"latency_50": 163.256997,
"latency_90": 168.68158590000002,
"latency_95": 170.24915575,
"latency_99": 173.29093638,
"latency_999": 175.298208738
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 214,
"throughput": 14.27,
"latency_mean": 70.39102674299066,
"latency_std": 0.645440942501749,
"latency_50": 70.2169265,
"latency_90": 71.08614279999999,
"latency_95": 71.45404645,
"latency_99": 73.1100614,
"latency_999": 74.05629770400002
},
"optimized": {
"nb_forwards": 265,
"throughput": 17.67,
"latency_mean": 56.682904645283024,
"latency_std": 0.29074631764368225,
"latency_50": 56.604756,
"latency_90": 56.9511692,
"latency_95": 57.2699554,
"latency_99": 58.03212468,
"latency_999": 58.197217128
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 85,
"throughput": 5.67,
"latency_mean": 176.69271157647057,
"latency_std": 19.225586228496905,
"latency_50": 166.86136,
"latency_90": 213.846849,
"latency_95": 214.451406,
"latency_99": 216.24402419999998,
"latency_999": 217.66288122
},
"optimized": {
"nb_forwards": 72,
"throughput": 4.8,
"latency_mean": 209.2788515277778,
"latency_std": 1.0661208330184972,
"latency_50": 209.1019465,
"latency_90": 210.5121405,
"latency_95": 211.63171434999998,
"latency_99": 212.82214144,
"latency_999": 212.860057144
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 170,
"throughput": 11.33,
"latency_mean": 88.32521131764706,
"latency_std": 1.6082421836618368,
"latency_50": 88.34603,
"latency_90": 90.24364059999999,
"latency_95": 90.82843125,
"latency_99": 92.49519597,
"latency_999": 94.140928359
},
"optimized": {
"nb_forwards": 160,
"throughput": 10.67,
"latency_mean": 94.18205897499999,
"latency_std": 11.564821917485974,
"latency_50": 103.343016,
"latency_90": 104.0051318,
"latency_95": 104.4116828,
"latency_99": 105.75135414,
"latency_999": 106.152972641
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 331,
"throughput": 22.07,
"latency_mean": 45.34665541087613,
"latency_std": 4.816947937501302,
"latency_50": 43.877047,
"latency_90": 48.211528,
"latency_95": 59.708112,
"latency_99": 60.144242399999996,
"latency_999": 60.57435263
},
"optimized": {
"nb_forwards": 623,
"throughput": 41.53,
"latency_mean": 24.101585462279292,
"latency_std": 0.490914928207836,
"latency_50": 24.033518,
"latency_90": 24.7787182,
"latency_95": 25.0034624,
"latency_99": 25.4011857,
"latency_999": 25.923231378000025
}
}
],
"others": {
"baseline": {
"precision": 0.9358012339503085,
"recall": 0.9444631437226523,
"f1": 0.9401122372057961,
"accuracy": 0.9882013940267124
},
"optimized": {
"precision": 0.9358012339503085,
"recall": 0.9444631437226523,
"f1": 0.9401122372057961,
"accuracy": 0.9882013940267124
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "distilbert"
},
{
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english",
"task": "token-classification",
"task_args": null,
"dataset": {
"path": "conll2003",
"eval_split": "validation",
"data_keys": {
"primary": "tokens",
"secondary": null
},
"ref_keys": [
"ner_tags"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "static",
"operators_to_quantize": [
"Add"
],
"node_exclusion": [
"layernorm",
"gelu",
"residual",
"gather",
"softmax"
],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.991\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 348,
"throughput": 23.2,
"latency_mean": 43.15195731609195,
"latency_std": 1.3049791783770857,
"latency_50": 43.026682,
"latency_90": 44.93291970000001,
"latency_95": 45.4691658,
"latency_99": 46.60123,
"latency_999": 47.391866047
},
"optimized": {
"nb_forwards": 456,
"throughput": 30.4,
"latency_mean": 32.961429899122805,
"latency_std": 0.22594982626954999,
"latency_50": 32.925405,
"latency_90": 33.210001,
"latency_95": 33.29684425,
"latency_99": 33.561622449999994,
"latency_999": 34.45556356
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 328,
"throughput": 21.87,
"latency_mean": 45.769709847560975,
"latency_std": 5.105158395405133,
"latency_50": 47.564638,
"latency_90": 49.1212201,
"latency_95": 49.496884,
"latency_99": 50.38536208,
"latency_999": 50.559775634000005
},
"optimized": {
"nb_forwards": 790,
"throughput": 52.67,
"latency_mean": 18.99811082658228,
"latency_std": 0.18571276934069156,
"latency_50": 18.955887,
"latency_90": 19.2610981,
"latency_95": 19.371919350000002,
"latency_99": 19.535236910000002,
"latency_999": 19.715454875
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 214,
"throughput": 14.27,
"latency_mean": 70.2763914906542,
"latency_std": 0.2860076644639914,
"latency_50": 70.2829375,
"latency_90": 70.6419829,
"latency_95": 70.73166590000001,
"latency_99": 70.99700557,
"latency_999": 71.13112393099999
},
"optimized": {
"nb_forwards": 240,
"throughput": 16.0,
"latency_mean": 62.699298579166665,
"latency_std": 0.3115197355843105,
"latency_50": 62.624987,
"latency_90": 63.0751223,
"latency_95": 63.22834435,
"latency_99": 63.65015018,
"latency_999": 64.543749036
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 71,
"throughput": 4.73,
"latency_mean": 213.8736818169014,
"latency_std": 1.581524798477253,
"latency_50": 213.656346,
"latency_90": 214.2873,
"latency_95": 214.4610125,
"latency_99": 218.50125879999996,
"latency_999": 225.79765168000006
},
"optimized": {
"nb_forwards": 67,
"throughput": 4.47,
"latency_mean": 224.49603488059702,
"latency_std": 14.179003233192402,
"latency_50": 228.344384,
"latency_90": 228.9939548,
"latency_95": 229.2531382,
"latency_99": 229.51218631999998,
"latency_999": 229.879069232
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 126,
"throughput": 8.4,
"latency_mean": 119.57019325396826,
"latency_std": 0.9318178327933169,
"latency_50": 119.624132,
"latency_90": 120.7031565,
"latency_95": 120.95046825,
"latency_99": 121.62923875,
"latency_999": 122.22802
},
"optimized": {
"nb_forwards": 163,
"throughput": 10.87,
"latency_mean": 92.49704997546013,
"latency_std": 3.5224258875712082,
"latency_50": 91.538022,
"latency_90": 98.7628616,
"latency_95": 99.7127255,
"latency_99": 101.79093066,
"latency_999": 103.67060976999998
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 85,
"throughput": 5.67,
"latency_mean": 177.4090096117647,
"latency_std": 18.424368744880013,
"latency_50": 169.497669,
"latency_90": 219.17437120000002,
"latency_95": 221.619667,
"latency_99": 223.05841704,
"latency_999": 223.330345704
},
"optimized": {
"nb_forwards": 71,
"throughput": 4.73,
"latency_mean": 211.88950738028169,
"latency_std": 29.120931744682288,
"latency_50": 237.172705,
"latency_90": 238.463971,
"latency_95": 238.7496675,
"latency_99": 239.3086489,
"latency_999": 239.40512899
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 272,
"throughput": 18.13,
"latency_mean": 55.23031008455882,
"latency_std": 1.1571386368235503,
"latency_50": 55.2722705,
"latency_90": 56.607078200000004,
"latency_95": 57.48416465,
"latency_99": 58.30250327,
"latency_999": 58.528941114
},
"optimized": {
"nb_forwards": 253,
"throughput": 16.87,
"latency_mean": 59.30528993675889,
"latency_std": 0.2044975324140483,
"latency_50": 59.279888,
"latency_90": 59.5549104,
"latency_95": 59.60868,
"latency_99": 59.898025759999996,
"latency_999": 60.518632308
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 37,
"throughput": 2.47,
"latency_mean": 409.6796750540541,
"latency_std": 3.4623663344659903,
"latency_50": 409.339737,
"latency_90": 413.77022339999996,
"latency_95": 416.1084598,
"latency_99": 419.1264798,
"latency_999": 419.41464377999995
},
"optimized": {
"nb_forwards": 31,
"throughput": 2.07,
"latency_mean": 493.00366906451615,
"latency_std": 3.929536871101732,
"latency_50": 493.144742,
"latency_90": 498.634207,
"latency_95": 498.8949985,
"latency_99": 499.6773647,
"latency_999": 499.91265107
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 173,
"throughput": 11.53,
"latency_mean": 87.05498305780347,
"latency_std": 1.5424139654073576,
"latency_50": 86.877938,
"latency_90": 88.887705,
"latency_95": 89.4437458,
"latency_99": 91.18471404,
"latency_999": 92.01616860000001
},
"optimized": {
"nb_forwards": 135,
"throughput": 9.0,
"latency_mean": 111.36274993333333,
"latency_std": 9.414259569194291,
"latency_50": 115.101582,
"latency_90": 116.2764454,
"latency_95": 116.6387619,
"latency_99": 116.93107282,
"latency_999": 117.27618652400001
}
}
],
"others": {
"baseline": {
"precision": 0.9358012339503085,
"recall": 0.9444631437226523,
"f1": 0.9401122372057961,
"accuracy": 0.9882013940267124
},
"optimized": {
"precision": 0.9087171052631579,
"recall": 0.929821608885897,
"f1": 0.9191482282482116,
"accuracy": 0.9856898095868541
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "distilbert"
},
{
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english",
"task": "token-classification",
"task_args": null,
"dataset": {
"path": "conll2003",
"eval_split": "validation",
"data_keys": {
"primary": "tokens",
"secondary": null
},
"ref_keys": [
"ner_tags"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "static",
"operators_to_quantize": [
"Add"
],
"node_exclusion": [],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3119.116\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 90,
"throughput": 6.0,
"latency_mean": 168.29981604444444,
"latency_std": 3.0980387809618484,
"latency_50": 167.5409385,
"latency_90": 170.98883170000002,
"latency_95": 174.5708666,
"latency_99": 179.67467568,
"latency_999": 183.294064668
},
"optimized": {
"nb_forwards": 75,
"throughput": 5.0,
"latency_mean": 201.88371818666667,
"latency_std": 5.1702570236285155,
"latency_50": 201.088276,
"latency_90": 208.22007219999998,
"latency_95": 211.4593359,
"latency_99": 214.37736256000002,
"latency_999": 218.26520815600003
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 128,
"throughput": 8.53,
"latency_mean": 117.345409171875,
"latency_std": 1.0232306892142995,
"latency_50": 117.570859,
"latency_90": 118.4811602,
"latency_95": 118.66072285,
"latency_99": 119.97331652,
"latency_999": 120.173632319
},
"optimized": {
"nb_forwards": 147,
"throughput": 9.8,
"latency_mean": 102.08866921088435,
"latency_std": 4.207024690408841,
"latency_50": 100.900148,
"latency_90": 106.6952282,
"latency_95": 111.67752269999998,
"latency_99": 115.24070447999998,
"latency_999": 122.8603576240001
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 256,
"throughput": 17.07,
"latency_mean": 58.79949217578125,
"latency_std": 7.2389593462092225,
"latency_50": 55.00797,
"latency_90": 71.60057,
"latency_95": 72.5113045,
"latency_99": 72.87857595,
"latency_999": 73.62029899000001
},
"optimized": {
"nb_forwards": 228,
"throughput": 15.2,
"latency_mean": 66.03353574122808,
"latency_std": 0.2155799936674679,
"latency_50": 66.018175,
"latency_90": 66.2633382,
"latency_95": 66.3559738,
"latency_99": 66.66150048,
"latency_999": 67.423175391
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 342,
"throughput": 22.8,
"latency_mean": 44.01321974853801,
"latency_std": 4.990156500118981,
"latency_50": 42.3942795,
"latency_90": 56.7239391,
"latency_95": 57.8124011,
"latency_99": 58.51948686,
"latency_999": 59.132265571
},
"optimized": {
"nb_forwards": 474,
"throughput": 31.6,
"latency_mean": 31.67708501898734,
"latency_std": 1.0256274074298153,
"latency_50": 31.644259,
"latency_90": 33.042136,
"latency_95": 33.4474054,
"latency_99": 34.71017928,
"latency_999": 35.40770895199999
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 129,
"throughput": 8.6,
"latency_mean": 116.96203879069768,
"latency_std": 0.9805415537137262,
"latency_50": 116.711745,
"latency_90": 117.476479,
"latency_95": 119.6996568,
"latency_99": 120.4817116,
"latency_999": 120.693500216
},
"optimized": {
"nb_forwards": 152,
"throughput": 10.13,
"latency_mean": 98.81530088157895,
"latency_std": 2.408744102355572,
"latency_50": 98.4953865,
"latency_90": 101.9166918,
"latency_95": 103.45597070000001,
"latency_99": 105.89102432000001,
"latency_999": 107.434858943
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 72,
"throughput": 4.8,
"latency_mean": 211.162219375,
"latency_std": 0.50332579026598,
"latency_50": 211.161418,
"latency_90": 211.835143,
"latency_95": 211.9189473,
"latency_99": 212.26486029,
"latency_999": 212.469340929
},
"optimized": {
"nb_forwards": 78,
"throughput": 5.2,
"latency_mean": 193.01161867948716,
"latency_std": 4.477161349632226,
"latency_50": 193.023931,
"latency_90": 198.0193413,
"latency_95": 200.14843679999998,
"latency_99": 206.14731525000002,
"latency_999": 211.593862125
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 235,
"throughput": 15.67,
"latency_mean": 63.85546833191489,
"latency_std": 6.973859157506543,
"latency_50": 69.383357,
"latency_90": 70.026538,
"latency_95": 70.3286013,
"latency_99": 70.6866161,
"latency_999": 71.185565742
},
"optimized": {
"nb_forwards": 244,
"throughput": 16.27,
"latency_mean": 61.635230135245905,
"latency_std": 5.810202171663617,
"latency_50": 58.524244,
"latency_90": 70.1898687,
"latency_95": 70.36230454999999,
"latency_99": 70.64778987000001,
"latency_999": 72.10732932400002
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 38,
"throughput": 2.53,
"latency_mean": 397.28297392105264,
"latency_std": 0.6990720798085198,
"latency_50": 397.169161,
"latency_90": 398.30781060000004,
"latency_95": 398.40529275,
"latency_99": 398.80296562,
"latency_999": 398.940152962
},
"optimized": {
"nb_forwards": 38,
"throughput": 2.53,
"latency_mean": 397.74469242105266,
"latency_std": 9.112867850853375,
"latency_50": 396.410306,
"latency_90": 411.3193268,
"latency_95": 412.24714525,
"latency_99": 415.15970197,
"latency_999": 416.37122889700004
}
},
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 336,
"throughput": 22.4,
"latency_mean": 44.668419485119045,
"latency_std": 4.092836734909812,
"latency_50": 46.0771455,
"latency_90": 46.8151985,
"latency_95": 47.12449375,
"latency_99": 47.4169311,
"latency_999": 47.675663060000005
},
"optimized": {
"nb_forwards": 800,
"throughput": 53.33,
"latency_mean": 18.77007987125,
"latency_std": 1.3253872815877223,
"latency_50": 18.32753,
"latency_90": 21.5042558,
"latency_95": 21.83777205,
"latency_99": 22.11860088,
"latency_999": 22.523989289000014
}
}
],
"others": {
"baseline": {
"precision": 0.9358012339503085,
"recall": 0.9444631437226523,
"f1": 0.9401122372057961,
"accuracy": 0.9882013940267124
},
"optimized": {
"precision": 0.05001838911364472,
"recall": 0.16021541568495457,
"f1": 0.07623623623623622,
"accuracy": 0.31141700089560376
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "distilbert"
},
{
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english",
"task": "token-classification",
"task_args": null,
"dataset": {
"path": "conll2003",
"eval_split": "validation",
"data_keys": {
"primary": "tokens",
"secondary": null
},
"ref_keys": [
"ner_tags"
],
"name": null,
"calibration_split": "train"
},
"quantization_approach": "dynamic",
"operators_to_quantize": [
"Add",
"MatMul"
],
"node_exclusion": [
"layernorm",
"gelu",
"residual",
"gather",
"softmax"
],
"aware_training": false,
"per_channel": false,
"calibration": {
"method": "minmax",
"num_calibration_samples": 100,
"calibration_histogram_percentile": null,
"calibration_moving_average": null,
"calibration_moving_average_constant": null
},
"framework": "onnxruntime",
"framework_args": {
"opset": 11,
"optimization_level": 1
},
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3170.168\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n",
"versions": {
"transformers": "4.20.1",
"optimum": "1.2.3.dev0",
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7"
},
"evaluation": {
"time": [
{
"batch_size": 1,
"input_length": 32,
"baseline": {
"nb_forwards": 456,
"throughput": 30.4,
"latency_mean": 32.90015813157895,
"latency_std": 0.9986947055465656,
"latency_50": 32.8186695,
"latency_90": 34.223269,
"latency_95": 34.7373955,
"latency_99": 35.79150405,
"latency_999": 37.31698694000001
},
"optimized": {
"nb_forwards": 2133,
"throughput": 142.2,
"latency_mean": 7.03493227238631,
"latency_std": 0.35354984774223724,
"latency_50": 6.950204,
"latency_90": 7.5335166,
"latency_95": 7.5981322,
"latency_99": 7.7467418,
"latency_999": 7.949874583999998
}
},
{
"batch_size": 4,
"input_length": 64,
"baseline": {
"nb_forwards": 128,
"throughput": 8.53,
"latency_mean": 117.711207453125,
"latency_std": 0.3039802910847197,
"latency_50": 117.713083,
"latency_90": 118.1415121,
"latency_95": 118.31928495,
"latency_99": 118.40991929,
"latency_999": 118.45796044400001
},
"optimized": {
"nb_forwards": 342,
"throughput": 22.8,
"latency_mean": 43.9346932251462,
"latency_std": 1.9823168868846608,
"latency_50": 44.48181,
"latency_90": 44.9406133,
"latency_95": 45.33209005,
"latency_99": 46.44872295999999,
"latency_999": 47.516010738999995
}
},
{
"batch_size": 8,
"input_length": 32,
"baseline": {
"nb_forwards": 142,
"throughput": 9.47,
"latency_mean": 106.45585029577465,
"latency_std": 13.710361788824905,
"latency_50": 115.6649905,
"latency_90": 116.2565268,
"latency_95": 116.41340004999999,
"latency_99": 116.79343428,
"latency_999": 119.25626965200003
},
"optimized": {
"nb_forwards": 355,
"throughput": 23.67,
"latency_mean": 42.35345902253521,
"latency_std": 0.16699971956793372,
"latency_50": 42.330536,
"latency_90": 42.578397200000005,
"latency_95": 42.637379,
"latency_99": 42.80914288,
"latency_999": 43.008794852
}
},
{
"batch_size": 1,
"input_length": 128,
"baseline": {
"nb_forwards": 272,
"throughput": 18.13,
"latency_mean": 55.20024977941176,
"latency_std": 0.9356174611392704,
"latency_50": 55.154456,
"latency_90": 56.3394606,
"latency_95": 56.87050635,
"latency_99": 57.61698746000001,
"latency_999": 58.883503068999985
},
"optimized": {
"nb_forwards": 584,
"throughput": 38.93,
"latency_mean": 25.724440731164385,
"latency_std": 0.1774167175481294,
"latency_50": 25.694553,
"latency_90": 25.9448093,
"latency_95": 26.0568756,
"latency_99": 26.254885369999997,
"latency_999": 26.525804783000005
}
},
{
"batch_size": 4,
"input_length": 32,
"baseline": {
"nb_forwards": 214,
"throughput": 14.27,
"latency_mean": 70.41022843925234,
"latency_std": 0.9857357168050909,
"latency_50": 70.4451575,
"latency_90": 71.6293733,
"latency_95": 71.97065065000001,
"latency_99": 72.92320712,
"latency_999": 73.059142452
},
"optimized": {
"nb_forwards": 662,
"throughput": 44.13,
"latency_mean": 22.66916344410876,
"latency_std": 0.1531962446356845,
"latency_50": 22.644899,
"latency_90": 22.882265699999998,
"latency_95": 22.93566205,
"latency_99": 23.106536690000002,
"latency_999": 23.233784632
}
},
{
"batch_size": 4,
"input_length": 128,
"baseline": {
"nb_forwards": 68,
"throughput": 4.53,
"latency_mean": 220.69089520588233,
"latency_std": 2.3715499250111147,
"latency_50": 220.928956,
"latency_90": 223.564325,
"latency_95": 224.24846325,
"latency_99": 225.59921588999998,
"latency_999": 225.913941489
},
"optimized": {
"nb_forwards": 160,
"throughput": 10.67,
"latency_mean": 94.33282233125,
"latency_std": 1.3440036091128054,
"latency_50": 94.0403795,
"latency_90": 96.10274629999999,
"latency_95": 96.99199729999998,
"latency_99": 98.02360016,
"latency_999": 99.111505656
}
},
{
"batch_size": 8,
"input_length": 64,
"baseline": {
"nb_forwards": 91,
"throughput": 6.07,
"latency_mean": 165.66658007692308,
"latency_std": 9.200627894933877,
"latency_50": 163.47137,
"latency_90": 169.773734,
"latency_95": 172.0753955,
"latency_99": 213.2899759,
"latency_999": 214.50856338999998
},
"optimized": {
"nb_forwards": 172,
"throughput": 11.47,
"latency_mean": 87.71301163953488,
"latency_std": 1.0979033087486965,
"latency_50": 87.4641435,
"latency_90": 89.3678849,
"latency_95": 89.7342365,
"latency_99": 90.56987919,
"latency_999": 90.839936162
}
},
{
"batch_size": 8,
"input_length": 128,
"baseline": {
"nb_forwards": 39,
"throughput": 2.6,
"latency_mean": 391.16164482051283,
"latency_std": 24.77584642803748,
"latency_50": 397.925266,
"latency_90": 401.79618239999996,
"latency_95": 402.9476371,
"latency_99": 403.36617936,
"latency_999": 403.412153736
},
"optimized": {
"nb_forwards": 82,
"throughput": 5.47,
"latency_mean": 183.34596223170732,
"latency_std": 1.9414583808520627,
"latency_50": 182.9643005,
"latency_90": 185.9883817,
"latency_95": 187.12977195,
"latency_99": 188.01645273,
"latency_999": 188.707885173
}
},
{
"batch_size": 1,
"input_length": 64,
"baseline": {
"nb_forwards": 254,
"throughput": 16.93,
"latency_mean": 59.14541264566929,
"latency_std": 1.0597184999316425,
"latency_50": 59.1676845,
"latency_90": 60.564947700000005,
"latency_95": 60.97932805,
"latency_99": 61.52093252,
"latency_999": 61.899637246
},
"optimized": {
"nb_forwards": 1103,
"throughput": 73.53,
"latency_mean": 13.600328429737079,
"latency_std": 0.19450611359009803,
"latency_50": 13.567231,
"latency_90": 13.8727804,
"latency_95": 14.010098300000001,
"latency_99": 14.15573608,
"latency_999": 14.268220878
}
}
],
"others": {
"baseline": {
"precision": 0.9358012339503085,
"recall": 0.9444631437226523,
"f1": 0.9401122372057961,
"accuracy": 0.9882013940267124
},
"optimized": {
"precision": 0.9337560487235108,
"recall": 0.9417704476607203,
"f1": 0.9377461248428991,
"accuracy": 0.9878314707371209
}
}
},
"max_eval_samples": null,
"time_benchmark_args": {
"duration": 15,
"warmup_runs": 5
},
"model_type": "distilbert"
}
]