[ { "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", "task": "token-classification", "task_args": null, "dataset": { "path": "conll2003", "eval_split": "validation", "data_keys": { "primary": "tokens", "secondary": null }, "ref_keys": [ "ner_tags" ], "name": null, "calibration_split": "train" }, "quantization_approach": "static", "operators_to_quantize": [ "Add", "MatMul" ], "node_exclusion": [], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.670\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 40, "throughput": 2.67, "latency_mean": 375.10382185000003, "latency_std": 41.77851887343237, "latency_50": 402.085788, "latency_90": 405.6853881, "latency_95": 406.5309876, "latency_99": 407.68657795999997, "latency_999": 407.928640196 }, "optimized": { "nb_forwards": 67, "throughput": 4.47, "latency_mean": 225.09343717910448, "latency_std": 5.4422008927739745, "latency_50": 224.468437, "latency_90": 233.1922012, "latency_95": 234.06977830000002, "latency_99": 238.47171616, "latency_999": 239.621179816 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 226, "throughput": 15.07, "latency_mean": 66.56901730973452, "latency_std": 7.513087298346711, "latency_50": 71.2563305, "latency_90": 72.6457545, "latency_95": 72.8911035, "latency_99": 73.828916, "latency_999": 74.4237675 }, "optimized": { "nb_forwards": 436, "throughput": 29.07, "latency_mean": 34.45165110091743, "latency_std": 1.0538553485588218, "latency_50": 34.582433, "latency_90": 34.9882515, "latency_95": 35.17064425, "latency_99": 35.965547449999995, "latency_999": 38.29062908499999 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 126, "throughput": 8.4, "latency_mean": 119.99883396031747, "latency_std": 0.5389393608027111, "latency_50": 119.924409, "latency_90": 120.578213, "latency_95": 120.87888125, "latency_99": 121.94754125, "latency_999": 122.377029 }, "optimized": { "nb_forwards": 260, "throughput": 17.33, "latency_mean": 57.854139219230774, "latency_std": 2.41471729691721, "latency_50": 57.400247, "latency_90": 61.3181592, "latency_95": 62.38365544999999, "latency_99": 64.46206894, "latency_999": 65.22337084999998 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 348, "throughput": 23.2, "latency_mean": 43.22926725, "latency_std": 1.1946103356932485, "latency_50": 43.0492745, "latency_90": 44.705008299999996, "latency_95": 45.636852299999994, "latency_99": 46.63035402, "latency_999": 46.997215917999995 }, "optimized": { "nb_forwards": 789, "throughput": 52.6, "latency_mean": 19.02440978073511, "latency_std": 1.1428706688800712, "latency_50": 18.854472, "latency_90": 20.772688400000003, "latency_95": 21.339546999999996, "latency_99": 21.90764228, "latency_999": 22.18167628399999 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 204, "throughput": 13.6, "latency_mean": 73.54620642647059, "latency_std": 1.573277335192714, "latency_50": 73.3753375, "latency_90": 75.5904331, "latency_95": 76.15987129999999, "latency_99": 78.56596293, "latency_999": 80.276804319 }, "optimized": { "nb_forwards": 396, "throughput": 26.4, "latency_mean": 37.94637206818182, "latency_std": 0.22863560761755683, "latency_50": 37.917286, "latency_90": 38.2418925, "latency_95": 38.31172375, "latency_99": 38.762135050000005, "latency_999": 38.947024510000006 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 70, "throughput": 4.67, "latency_mean": 215.9852832857143, "latency_std": 2.6695382982970624, "latency_50": 215.4558835, "latency_90": 219.254347, "latency_95": 219.83214235, "latency_99": 222.17290862000002, "latency_999": 222.546938162 }, "optimized": { "nb_forwards": 116, "throughput": 7.73, "latency_mean": 130.388671, "latency_std": 0.9538270722224035, "latency_50": 130.420778, "latency_90": 131.4585225, "latency_95": 131.97905225, "latency_99": 133.03534015, "latency_999": 133.14044914 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 68, "throughput": 4.53, "latency_mean": 222.9005361617647, "latency_std": 2.9786389600252616, "latency_50": 222.130681, "latency_90": 226.3922357, "latency_95": 227.17735725, "latency_99": 231.35961219, "latency_999": 232.82185281900001 }, "optimized": { "nb_forwards": 112, "throughput": 7.47, "latency_mean": 135.05900691964285, "latency_std": 0.852188728183432, "latency_50": 135.0183545, "latency_90": 136.08118190000002, "latency_95": 136.6358585, "latency_99": 137.6042464, "latency_999": 137.777202544 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 127, "throughput": 8.47, "latency_mean": 118.38734347244095, "latency_std": 3.928549187092105, "latency_50": 118.722872, "latency_90": 119.61111340000001, "latency_95": 120.56405670000001, "latency_99": 122.34993956, "latency_999": 122.379941486 }, "optimized": { "nb_forwards": 233, "throughput": 15.53, "latency_mean": 64.56411339484978, "latency_std": 0.8101063203434803, "latency_50": 64.369473, "latency_90": 65.578687, "latency_95": 66.343236, "latency_99": 67.23023495999999, "latency_999": 67.842266136 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 314, "throughput": 20.93, "latency_mean": 47.871883372611464, "latency_std": 0.8675741645315053, "latency_50": 47.86284, "latency_90": 48.8556855, "latency_95": 49.34610644999999, "latency_99": 50.61812615, "latency_999": 50.940127244 }, "optimized": { "nb_forwards": 1419, "throughput": 94.6, "latency_mean": 10.575771353770262, "latency_std": 0.6992989868391869, "latency_50": 10.433279, "latency_90": 11.4260774, "latency_95": 12.119523699999998, "latency_99": 12.74776788, "latency_999": 13.092563020000012 } } ], "others": { "baseline": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 }, "optimized": { "precision": 0.06543578604398588, "recall": 0.24335240659710536, "f1": 0.10313837375178317, "accuracy": 0.35697597445582335 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "distilbert" }, { "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", "task": "token-classification", "task_args": null, "dataset": { "path": "conll2003", "eval_split": "validation", "data_keys": { "primary": "tokens", "secondary": null }, "ref_keys": [ "ner_tags" ], "name": null, "calibration_split": "train" }, "quantization_approach": "static", "operators_to_quantize": [ "Add", "MatMul" ], "node_exclusion": [ "layernorm", "gelu", "residual", "gather", "softmax" ], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3105.038\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 315, "throughput": 21.0, "latency_mean": 47.722406904761904, "latency_std": 0.78575656702049, "latency_50": 47.657137, "latency_90": 48.539485, "latency_95": 49.1951445, "latency_99": 50.459615660000004, "latency_999": 51.17226236 }, "optimized": { "nb_forwards": 1829, "throughput": 121.93, "latency_mean": 8.204672595407327, "latency_std": 0.6281598971222003, "latency_50": 8.030814, "latency_90": 9.035519800000001, "latency_95": 9.1478804, "latency_99": 9.59396184, "latency_999": 10.840923620000012 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 140, "throughput": 9.33, "latency_mean": 107.46691665714286, "latency_std": 14.217711617362141, "latency_50": 118.404271, "latency_90": 119.217932, "latency_95": 120.1323987, "latency_99": 121.88812750999999, "latency_999": 122.23331556199999 }, "optimized": { "nb_forwards": 336, "throughput": 22.4, "latency_mean": 44.742087273809524, "latency_std": 1.297316432066614, "latency_50": 44.5606265, "latency_90": 46.6604945, "latency_95": 47.15595925, "latency_99": 47.76239855, "latency_999": 48.352806460000004 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 69, "throughput": 4.6, "latency_mean": 220.25039004347826, "latency_std": 1.5005813984244252, "latency_50": 219.820796, "latency_90": 222.1719386, "latency_95": 224.2704662, "latency_99": 225.16129407999998, "latency_999": 225.809551408 }, "optimized": { "nb_forwards": 160, "throughput": 10.67, "latency_mean": 94.04712534375, "latency_std": 4.044440830749728, "latency_50": 92.7247505, "latency_90": 100.1824987, "latency_95": 103.149005, "latency_99": 106.80153862, "latency_999": 107.010720374 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 251, "throughput": 16.73, "latency_mean": 59.98497594422311, "latency_std": 1.3002235321937636, "latency_50": 59.807255, "latency_90": 61.620392, "latency_95": 62.9347585, "latency_99": 63.5797715, "latency_999": 64.073255 }, "optimized": { "nb_forwards": 933, "throughput": 62.2, "latency_mean": 16.080496909967845, "latency_std": 0.6562666342873719, "latency_50": 16.202643, "latency_90": 16.5627894, "latency_95": 16.7647474, "latency_99": 17.10871436, "latency_999": 17.232195299999997 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 271, "throughput": 18.07, "latency_mean": 55.44747836162362, "latency_std": 1.1058764508579348, "latency_50": 55.358138, "latency_90": 56.887965, "latency_95": 57.3142435, "latency_99": 58.44232040000001, "latency_999": 58.78495296 }, "optimized": { "nb_forwards": 540, "throughput": 36.0, "latency_mean": 27.828701338888887, "latency_std": 0.2830425296733134, "latency_50": 27.783282, "latency_90": 28.192628600000003, "latency_95": 28.425469149999998, "latency_99": 28.75255937, "latency_999": 28.907412832000002 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 211, "throughput": 14.07, "latency_mean": 71.34490764454976, "latency_std": 0.8606794549682575, "latency_50": 71.218379, "latency_90": 72.307616, "latency_95": 72.7004855, "latency_99": 73.44670070000001, "latency_999": 76.77649755999997 }, "optimized": { "nb_forwards": 473, "throughput": 31.53, "latency_mean": 31.754805171247355, "latency_std": 0.4300285133323451, "latency_50": 31.703291, "latency_90": 32.3621104, "latency_95": 32.6649158, "latency_99": 33.04651476, "latency_999": 33.134080912 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 125, "throughput": 8.33, "latency_mean": 120.569009496, "latency_std": 0.8160222004383323, "latency_50": 120.371051, "latency_90": 121.2576836, "latency_95": 122.412088, "latency_99": 123.63907148, "latency_999": 124.379808824 }, "optimized": { "nb_forwards": 275, "throughput": 18.33, "latency_mean": 54.700663727272726, "latency_std": 0.6805625823558532, "latency_50": 54.534815, "latency_90": 55.637867, "latency_95": 56.2308853, "latency_99": 57.06877728, "latency_999": 57.400091958 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 45, "throughput": 3.0, "latency_mean": 337.56930973333334, "latency_std": 42.73015832642524, "latency_50": 311.940976, "latency_90": 408.1195232, "latency_95": 409.32610719999997, "latency_99": 409.70182116, "latency_999": 409.771442316 }, "optimized": { "nb_forwards": 73, "throughput": 4.87, "latency_mean": 205.58670958904108, "latency_std": 17.128151144285876, "latency_50": 216.043924, "latency_90": 219.5796966, "latency_95": 221.6779058, "latency_99": 224.22378336000003, "latency_999": 224.454495336 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 70, "throughput": 4.67, "latency_mean": 215.1244777, "latency_std": 1.555883759703903, "latency_50": 214.6171285, "latency_90": 216.7817589, "latency_95": 218.67742330000002, "latency_99": 220.89366772999998, "latency_999": 221.10970307300002 }, "optimized": { "nb_forwards": 173, "throughput": 11.53, "latency_mean": 86.89619405780347, "latency_std": 2.3718385044122723, "latency_50": 86.535865, "latency_90": 90.37287020000001, "latency_95": 91.8051702, "latency_99": 92.75978772, "latency_999": 92.817659772 } } ], "others": { "baseline": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 }, "optimized": { "precision": 0.9038969616908851, "recall": 0.9212386401884888, "f1": 0.912485414235706, "accuracy": 0.9842295860753086 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "distilbert" }, { "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", "task": "token-classification", "task_args": null, "dataset": { "path": "conll2003", "eval_split": "validation", "data_keys": { "primary": "tokens", "secondary": null }, "ref_keys": [ "ner_tags" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add", "MatMul" ], "node_exclusion": [], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.033\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 167, "throughput": 11.13, "latency_mean": 90.0069231257485, "latency_std": 1.7115040048180659, "latency_50": 89.808392, "latency_90": 92.1615498, "latency_95": 93.00187689999998, "latency_99": 94.87745056, "latency_999": 96.02146185400001 }, "optimized": { "nb_forwards": 347, "throughput": 23.13, "latency_mean": 43.265280452449566, "latency_std": 2.7751472818818734, "latency_50": 44.883167, "latency_90": 45.35501180000001, "latency_95": 46.0536215, "latency_99": 46.73552832, "latency_999": 46.9292037 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 311, "throughput": 20.73, "latency_mean": 48.2705229710611, "latency_std": 1.1247628435592778, "latency_50": 48.262398, "latency_90": 49.675041, "latency_95": 50.1826075, "latency_99": 51.5223755, "latency_999": 52.486126479999996 }, "optimized": { "nb_forwards": 1955, "throughput": 130.33, "latency_mean": 7.675419680306905, "latency_std": 0.3408992210776522, "latency_50": 7.708051, "latency_90": 7.9747872, "latency_95": 8.22643, "latency_99": 8.42614896, "latency_999": 9.247490550000006 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 70, "throughput": 4.67, "latency_mean": 214.59417684285717, "latency_std": 1.7664217544171172, "latency_50": 214.2422325, "latency_90": 216.45445769999998, "latency_95": 218.51388645, "latency_99": 220.63894481, "latency_999": 221.498688881 }, "optimized": { "nb_forwards": 171, "throughput": 11.4, "latency_mean": 87.8829150994152, "latency_std": 0.9834434065965216, "latency_50": 87.63179, "latency_90": 88.639136, "latency_95": 90.3700155, "latency_99": 91.69530259999999, "latency_999": 92.0263997 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 170, "throughput": 11.33, "latency_mean": 88.68199067647059, "latency_std": 1.5268934268549699, "latency_50": 88.7044375, "latency_90": 90.6080309, "latency_95": 91.1070495, "latency_99": 92.72121441, "latency_999": 94.157151444 }, "optimized": { "nb_forwards": 347, "throughput": 23.13, "latency_mean": 43.326895181556196, "latency_std": 0.4931306808409643, "latency_50": 43.189003, "latency_90": 44.0324068, "latency_95": 44.3867949, "latency_99": 45.111986800000004, "latency_999": 45.838947726 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 205, "throughput": 13.67, "latency_mean": 73.52497626829268, "latency_std": 1.0865490902318493, "latency_50": 73.373698, "latency_90": 74.60406520000001, "latency_95": 74.979193, "latency_99": 76.63692036, "latency_999": 80.98970668399998 }, "optimized": { "nb_forwards": 562, "throughput": 37.47, "latency_mean": 26.697691247330958, "latency_std": 0.33379520623836, "latency_50": 26.612126, "latency_90": 27.0071515, "latency_95": 27.35591295, "latency_99": 27.96691773, "latency_999": 28.784246988999993 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 210, "throughput": 14.0, "latency_mean": 71.64548409523809, "latency_std": 0.9364269258806002, "latency_50": 71.3511745, "latency_90": 73.0493582, "latency_95": 73.74027225, "latency_99": 74.29445254000001, "latency_999": 76.295386572 }, "optimized": { "nb_forwards": 700, "throughput": 46.67, "latency_mean": 21.43839562, "latency_std": 0.7747656036456344, "latency_50": 21.458011, "latency_90": 22.4099538, "latency_95": 22.8598522, "latency_99": 23.88133127, "latency_999": 24.30084730600001 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 341, "throughput": 22.73, "latency_mean": 44.00805451906158, "latency_std": 2.0146525121369048, "latency_50": 43.40077, "latency_90": 46.844243, "latency_95": 48.01232, "latency_99": 48.8285812, "latency_999": 49.91079640000002 }, "optimized": { "nb_forwards": 1191, "throughput": 79.4, "latency_mean": 12.596608948782535, "latency_std": 0.4604972017819279, "latency_50": 12.560364, "latency_90": 13.254538, "latency_95": 13.4481775, "latency_99": 13.824474499999997, "latency_999": 14.306386329999988 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 89, "throughput": 5.93, "latency_mean": 170.04430629213482, "latency_std": 1.7973801840824346, "latency_50": 169.804268, "latency_90": 172.3942636, "latency_95": 172.909571, "latency_99": 174.41490236, "latency_999": 174.83032853600002 }, "optimized": { "nb_forwards": 184, "throughput": 12.27, "latency_mean": 81.67628805434782, "latency_std": 2.2754557516244045, "latency_50": 81.0874735, "latency_90": 85.0994149, "latency_95": 85.74545605, "latency_99": 88.41751725, "latency_999": 89.711957766 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 37, "throughput": 2.47, "latency_mean": 414.4216835675676, "latency_std": 3.861346436570963, "latency_50": 415.141617, "latency_90": 419.931193, "latency_95": 421.07318360000005, "latency_99": 421.86215943999997, "latency_999": 422.088350644 }, "optimized": { "nb_forwards": 98, "throughput": 6.53, "latency_mean": 154.51860542857142, "latency_std": 3.911988556596126, "latency_50": 153.7217105, "latency_90": 159.4319015, "latency_95": 161.4452743, "latency_99": 163.25851040999999, "latency_999": 163.55056994100002 } } ], "others": { "baseline": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 }, "optimized": { "precision": 0.9337560487235108, "recall": 0.9417704476607203, "f1": 0.9377461248428991, "accuracy": 0.9878314707371209 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "distilbert" }, { "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", "task": "token-classification", "task_args": null, "dataset": { "path": "conll2003", "eval_split": "validation", "data_keys": { "primary": "tokens", "secondary": null }, "ref_keys": [ "ner_tags" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add" ], "node_exclusion": [ "layernorm", "gelu", "residual", "gather", "softmax" ], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3125.174\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 210, "throughput": 14.0, "latency_mean": 71.72416266666667, "latency_std": 1.0823165199145606, "latency_50": 71.4790375, "latency_90": 72.9353617, "latency_95": 73.99241959999999, "latency_99": 75.17633138, "latency_999": 75.422236178 }, "optimized": { "nb_forwards": 272, "throughput": 18.13, "latency_mean": 55.160912452205885, "latency_std": 0.6521145589906397, "latency_50": 55.189684, "latency_90": 55.7469555, "latency_95": 55.9487205, "latency_99": 57.71467353, "latency_999": 58.06304184299999 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 159, "throughput": 10.6, "latency_mean": 94.34125967295597, "latency_std": 10.03252637987446, "latency_50": 91.050963, "latency_90": 120.90638, "latency_95": 121.39196709999999, "latency_99": 122.02035808, "latency_999": 122.622120378 }, "optimized": { "nb_forwards": 141, "throughput": 9.4, "latency_mean": 107.02119182269503, "latency_std": 0.8366529782537496, "latency_50": 106.835229, "latency_90": 108.249955, "latency_95": 108.628056, "latency_99": 109.2045378, "latency_999": 110.26474030000001 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 248, "throughput": 16.53, "latency_mean": 60.49658227016129, "latency_std": 0.8819758121225761, "latency_50": 60.343132, "latency_90": 61.6551285, "latency_95": 62.34768465, "latency_99": 63.31741764, "latency_999": 63.487983883999995 }, "optimized": { "nb_forwards": 503, "throughput": 33.53, "latency_mean": 29.869608675944335, "latency_std": 0.3559872739101281, "latency_50": 29.794297, "latency_90": 30.1428484, "latency_95": 30.4600473, "latency_99": 30.75200356, "latency_999": 33.756738917999996 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 445, "throughput": 29.67, "latency_mean": 33.73988259325843, "latency_std": 1.5629102688800236, "latency_50": 33.480773, "latency_90": 35.338715799999996, "latency_95": 35.7559136, "latency_99": 36.873395439999996, "latency_999": 47.8180268919997 }, "optimized": { "nb_forwards": 1019, "throughput": 67.93, "latency_mean": 14.732223261040234, "latency_std": 0.9828893965554253, "latency_50": 14.401579, "latency_90": 16.897523, "latency_95": 17.0312649, "latency_99": 17.46899586, "latency_999": 17.893012501999998 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 210, "throughput": 14.0, "latency_mean": 71.59536255238095, "latency_std": 1.13338439776916, "latency_50": 71.154012, "latency_90": 73.357109, "latency_95": 73.92077855, "latency_99": 74.25820067000001, "latency_999": 75.003235419 }, "optimized": { "nb_forwards": 282, "throughput": 18.8, "latency_mean": 53.25616293617021, "latency_std": 4.987232229454594, "latency_50": 56.9742995, "latency_90": 57.4831075, "latency_95": 58.0667945, "latency_99": 59.20656624, "latency_999": 62.057965914999976 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 48, "throughput": 3.2, "latency_mean": 314.12435689583333, "latency_std": 4.446221158816441, "latency_50": 314.1643195, "latency_90": 319.1971656, "latency_95": 322.43416895, "latency_99": 326.67617025, "latency_999": 327.501665325 }, "optimized": { "nb_forwards": 47, "throughput": 3.13, "latency_mean": 323.93693336170213, "latency_std": 6.869443185578473, "latency_50": 323.040004, "latency_90": 334.16514060000003, "latency_95": 334.9778985, "latency_99": 336.97889436, "latency_999": 337.249830036 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 70, "throughput": 4.67, "latency_mean": 216.0601891, "latency_std": 2.0959098467954616, "latency_50": 215.485022, "latency_90": 219.06993409999998, "latency_95": 219.52531109999998, "latency_99": 222.29498567000002, "latency_999": 223.08344266699999 }, "optimized": { "nb_forwards": 92, "throughput": 6.13, "latency_mean": 163.75156455434782, "latency_std": 7.2184659324399, "latency_50": 161.7846625, "latency_90": 176.00411860000003, "latency_95": 179.22768994999998, "latency_99": 183.41052014000002, "latency_999": 188.60359111400004 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 165, "throughput": 11.0, "latency_mean": 91.31757051515152, "latency_std": 2.4793130848747467, "latency_50": 91.036743, "latency_90": 93.733273, "latency_95": 95.67535299999999, "latency_99": 100.79730819999997, "latency_999": 102.770634928 }, "optimized": { "nb_forwards": 163, "throughput": 10.87, "latency_mean": 92.08470889570552, "latency_std": 11.432354704165576, "latency_50": 84.815059, "latency_90": 105.0187196, "latency_95": 105.2827365, "latency_99": 106.12717995999999, "latency_999": 106.21855146600001 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 80, "throughput": 5.33, "latency_mean": 188.59378125, "latency_std": 22.91815791529492, "latency_50": 172.9137385, "latency_90": 219.50391, "latency_95": 220.21657405000002, "latency_99": 221.94066489, "latency_999": 223.095406389 }, "optimized": { "nb_forwards": 88, "throughput": 5.87, "latency_mean": 171.786525, "latency_std": 7.080702021982688, "latency_50": 170.577715, "latency_90": 184.1455852, "latency_95": 186.60128294999998, "latency_99": 190.34417446, "latency_999": 190.786445746 } } ], "others": { "baseline": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 }, "optimized": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "distilbert" }, { "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", "task": "token-classification", "task_args": null, "dataset": { "path": "conll2003", "eval_split": "validation", "data_keys": { "primary": "tokens", "secondary": null }, "ref_keys": [ "ner_tags" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add" ], "node_exclusion": [], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3102.480\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 37, "throughput": 2.47, "latency_mean": 408.1543652972973, "latency_std": 3.7417217521654447, "latency_50": 408.204875, "latency_90": 412.71899360000003, "latency_95": 415.5526328, "latency_99": 417.56349056, "latency_999": 418.43689865600004 }, "optimized": { "nb_forwards": 47, "throughput": 3.13, "latency_mean": 325.0274505106383, "latency_std": 16.42866903352087, "latency_50": 323.280477, "latency_90": 336.7398878, "latency_95": 340.73717709999994, "latency_99": 392.46748674, "latency_999": 395.68078127399997 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 270, "throughput": 18.0, "latency_mean": 55.55779645555556, "latency_std": 1.1666476240332937, "latency_50": 55.618944, "latency_90": 57.023193799999994, "latency_95": 57.38550095, "latency_99": 58.569932480000006, "latency_999": 59.960250984 }, "optimized": { "nb_forwards": 342, "throughput": 22.8, "latency_mean": 43.866120190058474, "latency_std": 1.226638661359772, "latency_50": 43.698571, "latency_90": 45.555251, "latency_95": 46.4943021, "latency_99": 47.907215019999995, "latency_999": 48.302261889 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 126, "throughput": 8.4, "latency_mean": 119.10938425396826, "latency_std": 5.242609662991225, "latency_50": 119.8814875, "latency_90": 122.013498, "latency_95": 122.8530875, "latency_99": 124.503561, "latency_999": 125.157123625 }, "optimized": { "nb_forwards": 182, "throughput": 12.13, "latency_mean": 82.46068521428572, "latency_std": 1.8851982998377101, "latency_50": 82.4162485, "latency_90": 84.9214402, "latency_95": 85.51261295, "latency_99": 87.40860171, "latency_999": 88.40375641499999 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 448, "throughput": 29.87, "latency_mean": 33.48908037053571, "latency_std": 1.1547087313019704, "latency_50": 33.380541, "latency_90": 35.0732339, "latency_95": 35.621661700000004, "latency_99": 36.54991294999999, "latency_999": 37.980162713999995 }, "optimized": { "nb_forwards": 1059, "throughput": 70.6, "latency_mean": 14.167183152974506, "latency_std": 0.3567793828104339, "latency_50": 14.113569, "latency_90": 14.613918199999999, "latency_95": 14.760972, "latency_99": 15.203892, "latency_999": 15.846131796 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 69, "throughput": 4.6, "latency_mean": 219.8001221014493, "latency_std": 1.682059211691071, "latency_50": 219.207299, "latency_90": 222.769191, "latency_95": 223.179612, "latency_99": 224.12751892, "latency_999": 225.31481789199998 }, "optimized": { "nb_forwards": 92, "throughput": 6.13, "latency_mean": 163.62187851086955, "latency_std": 3.9886368861391612, "latency_50": 163.256997, "latency_90": 168.68158590000002, "latency_95": 170.24915575, "latency_99": 173.29093638, "latency_999": 175.298208738 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 214, "throughput": 14.27, "latency_mean": 70.39102674299066, "latency_std": 0.645440942501749, "latency_50": 70.2169265, "latency_90": 71.08614279999999, "latency_95": 71.45404645, "latency_99": 73.1100614, "latency_999": 74.05629770400002 }, "optimized": { "nb_forwards": 265, "throughput": 17.67, "latency_mean": 56.682904645283024, "latency_std": 0.29074631764368225, "latency_50": 56.604756, "latency_90": 56.9511692, "latency_95": 57.2699554, "latency_99": 58.03212468, "latency_999": 58.197217128 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 85, "throughput": 5.67, "latency_mean": 176.69271157647057, "latency_std": 19.225586228496905, "latency_50": 166.86136, "latency_90": 213.846849, "latency_95": 214.451406, "latency_99": 216.24402419999998, "latency_999": 217.66288122 }, "optimized": { "nb_forwards": 72, "throughput": 4.8, "latency_mean": 209.2788515277778, "latency_std": 1.0661208330184972, "latency_50": 209.1019465, "latency_90": 210.5121405, "latency_95": 211.63171434999998, "latency_99": 212.82214144, "latency_999": 212.860057144 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 170, "throughput": 11.33, "latency_mean": 88.32521131764706, "latency_std": 1.6082421836618368, "latency_50": 88.34603, "latency_90": 90.24364059999999, "latency_95": 90.82843125, "latency_99": 92.49519597, "latency_999": 94.140928359 }, "optimized": { "nb_forwards": 160, "throughput": 10.67, "latency_mean": 94.18205897499999, "latency_std": 11.564821917485974, "latency_50": 103.343016, "latency_90": 104.0051318, "latency_95": 104.4116828, "latency_99": 105.75135414, "latency_999": 106.152972641 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 331, "throughput": 22.07, "latency_mean": 45.34665541087613, "latency_std": 4.816947937501302, "latency_50": 43.877047, "latency_90": 48.211528, "latency_95": 59.708112, "latency_99": 60.144242399999996, "latency_999": 60.57435263 }, "optimized": { "nb_forwards": 623, "throughput": 41.53, "latency_mean": 24.101585462279292, "latency_std": 0.490914928207836, "latency_50": 24.033518, "latency_90": 24.7787182, "latency_95": 25.0034624, "latency_99": 25.4011857, "latency_999": 25.923231378000025 } } ], "others": { "baseline": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 }, "optimized": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "distilbert" }, { "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", "task": "token-classification", "task_args": null, "dataset": { "path": "conll2003", "eval_split": "validation", "data_keys": { "primary": "tokens", "secondary": null }, "ref_keys": [ "ner_tags" ], "name": null, "calibration_split": "train" }, "quantization_approach": "static", "operators_to_quantize": [ "Add" ], "node_exclusion": [ "layernorm", "gelu", "residual", "gather", "softmax" ], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.991\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 348, "throughput": 23.2, "latency_mean": 43.15195731609195, "latency_std": 1.3049791783770857, "latency_50": 43.026682, "latency_90": 44.93291970000001, "latency_95": 45.4691658, "latency_99": 46.60123, "latency_999": 47.391866047 }, "optimized": { "nb_forwards": 456, "throughput": 30.4, "latency_mean": 32.961429899122805, "latency_std": 0.22594982626954999, "latency_50": 32.925405, "latency_90": 33.210001, "latency_95": 33.29684425, "latency_99": 33.561622449999994, "latency_999": 34.45556356 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 328, "throughput": 21.87, "latency_mean": 45.769709847560975, "latency_std": 5.105158395405133, "latency_50": 47.564638, "latency_90": 49.1212201, "latency_95": 49.496884, "latency_99": 50.38536208, "latency_999": 50.559775634000005 }, "optimized": { "nb_forwards": 790, "throughput": 52.67, "latency_mean": 18.99811082658228, "latency_std": 0.18571276934069156, "latency_50": 18.955887, "latency_90": 19.2610981, "latency_95": 19.371919350000002, "latency_99": 19.535236910000002, "latency_999": 19.715454875 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 214, "throughput": 14.27, "latency_mean": 70.2763914906542, "latency_std": 0.2860076644639914, "latency_50": 70.2829375, "latency_90": 70.6419829, "latency_95": 70.73166590000001, "latency_99": 70.99700557, "latency_999": 71.13112393099999 }, "optimized": { "nb_forwards": 240, "throughput": 16.0, "latency_mean": 62.699298579166665, "latency_std": 0.3115197355843105, "latency_50": 62.624987, "latency_90": 63.0751223, "latency_95": 63.22834435, "latency_99": 63.65015018, "latency_999": 64.543749036 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 71, "throughput": 4.73, "latency_mean": 213.8736818169014, "latency_std": 1.581524798477253, "latency_50": 213.656346, "latency_90": 214.2873, "latency_95": 214.4610125, "latency_99": 218.50125879999996, "latency_999": 225.79765168000006 }, "optimized": { "nb_forwards": 67, "throughput": 4.47, "latency_mean": 224.49603488059702, "latency_std": 14.179003233192402, "latency_50": 228.344384, "latency_90": 228.9939548, "latency_95": 229.2531382, "latency_99": 229.51218631999998, "latency_999": 229.879069232 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 126, "throughput": 8.4, "latency_mean": 119.57019325396826, "latency_std": 0.9318178327933169, "latency_50": 119.624132, "latency_90": 120.7031565, "latency_95": 120.95046825, "latency_99": 121.62923875, "latency_999": 122.22802 }, "optimized": { "nb_forwards": 163, "throughput": 10.87, "latency_mean": 92.49704997546013, "latency_std": 3.5224258875712082, "latency_50": 91.538022, "latency_90": 98.7628616, "latency_95": 99.7127255, "latency_99": 101.79093066, "latency_999": 103.67060976999998 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 85, "throughput": 5.67, "latency_mean": 177.4090096117647, "latency_std": 18.424368744880013, "latency_50": 169.497669, "latency_90": 219.17437120000002, "latency_95": 221.619667, "latency_99": 223.05841704, "latency_999": 223.330345704 }, "optimized": { "nb_forwards": 71, "throughput": 4.73, "latency_mean": 211.88950738028169, "latency_std": 29.120931744682288, "latency_50": 237.172705, "latency_90": 238.463971, "latency_95": 238.7496675, "latency_99": 239.3086489, "latency_999": 239.40512899 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 272, "throughput": 18.13, "latency_mean": 55.23031008455882, "latency_std": 1.1571386368235503, "latency_50": 55.2722705, "latency_90": 56.607078200000004, "latency_95": 57.48416465, "latency_99": 58.30250327, "latency_999": 58.528941114 }, "optimized": { "nb_forwards": 253, "throughput": 16.87, "latency_mean": 59.30528993675889, "latency_std": 0.2044975324140483, "latency_50": 59.279888, "latency_90": 59.5549104, "latency_95": 59.60868, "latency_99": 59.898025759999996, "latency_999": 60.518632308 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 37, "throughput": 2.47, "latency_mean": 409.6796750540541, "latency_std": 3.4623663344659903, "latency_50": 409.339737, "latency_90": 413.77022339999996, "latency_95": 416.1084598, "latency_99": 419.1264798, "latency_999": 419.41464377999995 }, "optimized": { "nb_forwards": 31, "throughput": 2.07, "latency_mean": 493.00366906451615, "latency_std": 3.929536871101732, "latency_50": 493.144742, "latency_90": 498.634207, "latency_95": 498.8949985, "latency_99": 499.6773647, "latency_999": 499.91265107 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 173, "throughput": 11.53, "latency_mean": 87.05498305780347, "latency_std": 1.5424139654073576, "latency_50": 86.877938, "latency_90": 88.887705, "latency_95": 89.4437458, "latency_99": 91.18471404, "latency_999": 92.01616860000001 }, "optimized": { "nb_forwards": 135, "throughput": 9.0, "latency_mean": 111.36274993333333, "latency_std": 9.414259569194291, "latency_50": 115.101582, "latency_90": 116.2764454, "latency_95": 116.6387619, "latency_99": 116.93107282, "latency_999": 117.27618652400001 } } ], "others": { "baseline": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 }, "optimized": { "precision": 0.9087171052631579, "recall": 0.929821608885897, "f1": 0.9191482282482116, "accuracy": 0.9856898095868541 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "distilbert" }, { "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", "task": "token-classification", "task_args": null, "dataset": { "path": "conll2003", "eval_split": "validation", "data_keys": { "primary": "tokens", "secondary": null }, "ref_keys": [ "ner_tags" ], "name": null, "calibration_split": "train" }, "quantization_approach": "static", "operators_to_quantize": [ "Add" ], "node_exclusion": [], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3119.116\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 90, "throughput": 6.0, "latency_mean": 168.29981604444444, "latency_std": 3.0980387809618484, "latency_50": 167.5409385, "latency_90": 170.98883170000002, "latency_95": 174.5708666, "latency_99": 179.67467568, "latency_999": 183.294064668 }, "optimized": { "nb_forwards": 75, "throughput": 5.0, "latency_mean": 201.88371818666667, "latency_std": 5.1702570236285155, "latency_50": 201.088276, "latency_90": 208.22007219999998, "latency_95": 211.4593359, "latency_99": 214.37736256000002, "latency_999": 218.26520815600003 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 128, "throughput": 8.53, "latency_mean": 117.345409171875, "latency_std": 1.0232306892142995, "latency_50": 117.570859, "latency_90": 118.4811602, "latency_95": 118.66072285, "latency_99": 119.97331652, "latency_999": 120.173632319 }, "optimized": { "nb_forwards": 147, "throughput": 9.8, "latency_mean": 102.08866921088435, "latency_std": 4.207024690408841, "latency_50": 100.900148, "latency_90": 106.6952282, "latency_95": 111.67752269999998, "latency_99": 115.24070447999998, "latency_999": 122.8603576240001 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 256, "throughput": 17.07, "latency_mean": 58.79949217578125, "latency_std": 7.2389593462092225, "latency_50": 55.00797, "latency_90": 71.60057, "latency_95": 72.5113045, "latency_99": 72.87857595, "latency_999": 73.62029899000001 }, "optimized": { "nb_forwards": 228, "throughput": 15.2, "latency_mean": 66.03353574122808, "latency_std": 0.2155799936674679, "latency_50": 66.018175, "latency_90": 66.2633382, "latency_95": 66.3559738, "latency_99": 66.66150048, "latency_999": 67.423175391 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 342, "throughput": 22.8, "latency_mean": 44.01321974853801, "latency_std": 4.990156500118981, "latency_50": 42.3942795, "latency_90": 56.7239391, "latency_95": 57.8124011, "latency_99": 58.51948686, "latency_999": 59.132265571 }, "optimized": { "nb_forwards": 474, "throughput": 31.6, "latency_mean": 31.67708501898734, "latency_std": 1.0256274074298153, "latency_50": 31.644259, "latency_90": 33.042136, "latency_95": 33.4474054, "latency_99": 34.71017928, "latency_999": 35.40770895199999 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 129, "throughput": 8.6, "latency_mean": 116.96203879069768, "latency_std": 0.9805415537137262, "latency_50": 116.711745, "latency_90": 117.476479, "latency_95": 119.6996568, "latency_99": 120.4817116, "latency_999": 120.693500216 }, "optimized": { "nb_forwards": 152, "throughput": 10.13, "latency_mean": 98.81530088157895, "latency_std": 2.408744102355572, "latency_50": 98.4953865, "latency_90": 101.9166918, "latency_95": 103.45597070000001, "latency_99": 105.89102432000001, "latency_999": 107.434858943 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 72, "throughput": 4.8, "latency_mean": 211.162219375, "latency_std": 0.50332579026598, "latency_50": 211.161418, "latency_90": 211.835143, "latency_95": 211.9189473, "latency_99": 212.26486029, "latency_999": 212.469340929 }, "optimized": { "nb_forwards": 78, "throughput": 5.2, "latency_mean": 193.01161867948716, "latency_std": 4.477161349632226, "latency_50": 193.023931, "latency_90": 198.0193413, "latency_95": 200.14843679999998, "latency_99": 206.14731525000002, "latency_999": 211.593862125 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 235, "throughput": 15.67, "latency_mean": 63.85546833191489, "latency_std": 6.973859157506543, "latency_50": 69.383357, "latency_90": 70.026538, "latency_95": 70.3286013, "latency_99": 70.6866161, "latency_999": 71.185565742 }, "optimized": { "nb_forwards": 244, "throughput": 16.27, "latency_mean": 61.635230135245905, "latency_std": 5.810202171663617, "latency_50": 58.524244, "latency_90": 70.1898687, "latency_95": 70.36230454999999, "latency_99": 70.64778987000001, "latency_999": 72.10732932400002 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 38, "throughput": 2.53, "latency_mean": 397.28297392105264, "latency_std": 0.6990720798085198, "latency_50": 397.169161, "latency_90": 398.30781060000004, "latency_95": 398.40529275, "latency_99": 398.80296562, "latency_999": 398.940152962 }, "optimized": { "nb_forwards": 38, "throughput": 2.53, "latency_mean": 397.74469242105266, "latency_std": 9.112867850853375, "latency_50": 396.410306, "latency_90": 411.3193268, "latency_95": 412.24714525, "latency_99": 415.15970197, "latency_999": 416.37122889700004 } }, { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 336, "throughput": 22.4, "latency_mean": 44.668419485119045, "latency_std": 4.092836734909812, "latency_50": 46.0771455, "latency_90": 46.8151985, "latency_95": 47.12449375, "latency_99": 47.4169311, "latency_999": 47.675663060000005 }, "optimized": { "nb_forwards": 800, "throughput": 53.33, "latency_mean": 18.77007987125, "latency_std": 1.3253872815877223, "latency_50": 18.32753, "latency_90": 21.5042558, "latency_95": 21.83777205, "latency_99": 22.11860088, "latency_999": 22.523989289000014 } } ], "others": { "baseline": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 }, "optimized": { "precision": 0.05001838911364472, "recall": 0.16021541568495457, "f1": 0.07623623623623622, "accuracy": 0.31141700089560376 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "distilbert" }, { "model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", "task": "token-classification", "task_args": null, "dataset": { "path": "conll2003", "eval_split": "validation", "data_keys": { "primary": "tokens", "secondary": null }, "ref_keys": [ "ner_tags" ], "name": null, "calibration_split": "train" }, "quantization_approach": "dynamic", "operators_to_quantize": [ "Add", "MatMul" ], "node_exclusion": [ "layernorm", "gelu", "residual", "gather", "softmax" ], "aware_training": false, "per_channel": false, "calibration": { "method": "minmax", "num_calibration_samples": 100, "calibration_histogram_percentile": null, "calibration_moving_average": null, "calibration_moving_average_constant": null }, "framework": "onnxruntime", "framework_args": { "opset": 11, "optimization_level": 1 }, "hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3170.168\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", "versions": { "transformers": "4.20.1", "optimum": "1.2.3.dev0", "optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" }, "evaluation": { "time": [ { "batch_size": 1, "input_length": 32, "baseline": { "nb_forwards": 456, "throughput": 30.4, "latency_mean": 32.90015813157895, "latency_std": 0.9986947055465656, "latency_50": 32.8186695, "latency_90": 34.223269, "latency_95": 34.7373955, "latency_99": 35.79150405, "latency_999": 37.31698694000001 }, "optimized": { "nb_forwards": 2133, "throughput": 142.2, "latency_mean": 7.03493227238631, "latency_std": 0.35354984774223724, "latency_50": 6.950204, "latency_90": 7.5335166, "latency_95": 7.5981322, "latency_99": 7.7467418, "latency_999": 7.949874583999998 } }, { "batch_size": 4, "input_length": 64, "baseline": { "nb_forwards": 128, "throughput": 8.53, "latency_mean": 117.711207453125, "latency_std": 0.3039802910847197, "latency_50": 117.713083, "latency_90": 118.1415121, "latency_95": 118.31928495, "latency_99": 118.40991929, "latency_999": 118.45796044400001 }, "optimized": { "nb_forwards": 342, "throughput": 22.8, "latency_mean": 43.9346932251462, "latency_std": 1.9823168868846608, "latency_50": 44.48181, "latency_90": 44.9406133, "latency_95": 45.33209005, "latency_99": 46.44872295999999, "latency_999": 47.516010738999995 } }, { "batch_size": 8, "input_length": 32, "baseline": { "nb_forwards": 142, "throughput": 9.47, "latency_mean": 106.45585029577465, "latency_std": 13.710361788824905, "latency_50": 115.6649905, "latency_90": 116.2565268, "latency_95": 116.41340004999999, "latency_99": 116.79343428, "latency_999": 119.25626965200003 }, "optimized": { "nb_forwards": 355, "throughput": 23.67, "latency_mean": 42.35345902253521, "latency_std": 0.16699971956793372, "latency_50": 42.330536, "latency_90": 42.578397200000005, "latency_95": 42.637379, "latency_99": 42.80914288, "latency_999": 43.008794852 } }, { "batch_size": 1, "input_length": 128, "baseline": { "nb_forwards": 272, "throughput": 18.13, "latency_mean": 55.20024977941176, "latency_std": 0.9356174611392704, "latency_50": 55.154456, "latency_90": 56.3394606, "latency_95": 56.87050635, "latency_99": 57.61698746000001, "latency_999": 58.883503068999985 }, "optimized": { "nb_forwards": 584, "throughput": 38.93, "latency_mean": 25.724440731164385, "latency_std": 0.1774167175481294, "latency_50": 25.694553, "latency_90": 25.9448093, "latency_95": 26.0568756, "latency_99": 26.254885369999997, "latency_999": 26.525804783000005 } }, { "batch_size": 4, "input_length": 32, "baseline": { "nb_forwards": 214, "throughput": 14.27, "latency_mean": 70.41022843925234, "latency_std": 0.9857357168050909, "latency_50": 70.4451575, "latency_90": 71.6293733, "latency_95": 71.97065065000001, "latency_99": 72.92320712, "latency_999": 73.059142452 }, "optimized": { "nb_forwards": 662, "throughput": 44.13, "latency_mean": 22.66916344410876, "latency_std": 0.1531962446356845, "latency_50": 22.644899, "latency_90": 22.882265699999998, "latency_95": 22.93566205, "latency_99": 23.106536690000002, "latency_999": 23.233784632 } }, { "batch_size": 4, "input_length": 128, "baseline": { "nb_forwards": 68, "throughput": 4.53, "latency_mean": 220.69089520588233, "latency_std": 2.3715499250111147, "latency_50": 220.928956, "latency_90": 223.564325, "latency_95": 224.24846325, "latency_99": 225.59921588999998, "latency_999": 225.913941489 }, "optimized": { "nb_forwards": 160, "throughput": 10.67, "latency_mean": 94.33282233125, "latency_std": 1.3440036091128054, "latency_50": 94.0403795, "latency_90": 96.10274629999999, "latency_95": 96.99199729999998, "latency_99": 98.02360016, "latency_999": 99.111505656 } }, { "batch_size": 8, "input_length": 64, "baseline": { "nb_forwards": 91, "throughput": 6.07, "latency_mean": 165.66658007692308, "latency_std": 9.200627894933877, "latency_50": 163.47137, "latency_90": 169.773734, "latency_95": 172.0753955, "latency_99": 213.2899759, "latency_999": 214.50856338999998 }, "optimized": { "nb_forwards": 172, "throughput": 11.47, "latency_mean": 87.71301163953488, "latency_std": 1.0979033087486965, "latency_50": 87.4641435, "latency_90": 89.3678849, "latency_95": 89.7342365, "latency_99": 90.56987919, "latency_999": 90.839936162 } }, { "batch_size": 8, "input_length": 128, "baseline": { "nb_forwards": 39, "throughput": 2.6, "latency_mean": 391.16164482051283, "latency_std": 24.77584642803748, "latency_50": 397.925266, "latency_90": 401.79618239999996, "latency_95": 402.9476371, "latency_99": 403.36617936, "latency_999": 403.412153736 }, "optimized": { "nb_forwards": 82, "throughput": 5.47, "latency_mean": 183.34596223170732, "latency_std": 1.9414583808520627, "latency_50": 182.9643005, "latency_90": 185.9883817, "latency_95": 187.12977195, "latency_99": 188.01645273, "latency_999": 188.707885173 } }, { "batch_size": 1, "input_length": 64, "baseline": { "nb_forwards": 254, "throughput": 16.93, "latency_mean": 59.14541264566929, "latency_std": 1.0597184999316425, "latency_50": 59.1676845, "latency_90": 60.564947700000005, "latency_95": 60.97932805, "latency_99": 61.52093252, "latency_999": 61.899637246 }, "optimized": { "nb_forwards": 1103, "throughput": 73.53, "latency_mean": 13.600328429737079, "latency_std": 0.19450611359009803, "latency_50": 13.567231, "latency_90": 13.8727804, "latency_95": 14.010098300000001, "latency_99": 14.15573608, "latency_999": 14.268220878 } } ], "others": { "baseline": { "precision": 0.9358012339503085, "recall": 0.9444631437226523, "f1": 0.9401122372057961, "accuracy": 0.9882013940267124 }, "optimized": { "precision": 0.9337560487235108, "recall": 0.9417704476607203, "f1": 0.9377461248428991, "accuracy": 0.9878314707371209 } } }, "max_eval_samples": null, "time_benchmark_args": { "duration": 15, "warmup_runs": 5 }, "model_type": "distilbert" } ]