|
[ |
|
{ |
|
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", |
|
"task": "token-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "conll2003", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "tokens", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"ner_tags" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "static", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.670\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 40, |
|
"throughput": 2.67, |
|
"latency_mean": 375.10382185000003, |
|
"latency_std": 41.77851887343237, |
|
"latency_50": 402.085788, |
|
"latency_90": 405.6853881, |
|
"latency_95": 406.5309876, |
|
"latency_99": 407.68657795999997, |
|
"latency_999": 407.928640196 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 67, |
|
"throughput": 4.47, |
|
"latency_mean": 225.09343717910448, |
|
"latency_std": 5.4422008927739745, |
|
"latency_50": 224.468437, |
|
"latency_90": 233.1922012, |
|
"latency_95": 234.06977830000002, |
|
"latency_99": 238.47171616, |
|
"latency_999": 239.621179816 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 226, |
|
"throughput": 15.07, |
|
"latency_mean": 66.56901730973452, |
|
"latency_std": 7.513087298346711, |
|
"latency_50": 71.2563305, |
|
"latency_90": 72.6457545, |
|
"latency_95": 72.8911035, |
|
"latency_99": 73.828916, |
|
"latency_999": 74.4237675 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 436, |
|
"throughput": 29.07, |
|
"latency_mean": 34.45165110091743, |
|
"latency_std": 1.0538553485588218, |
|
"latency_50": 34.582433, |
|
"latency_90": 34.9882515, |
|
"latency_95": 35.17064425, |
|
"latency_99": 35.965547449999995, |
|
"latency_999": 38.29062908499999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 126, |
|
"throughput": 8.4, |
|
"latency_mean": 119.99883396031747, |
|
"latency_std": 0.5389393608027111, |
|
"latency_50": 119.924409, |
|
"latency_90": 120.578213, |
|
"latency_95": 120.87888125, |
|
"latency_99": 121.94754125, |
|
"latency_999": 122.377029 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 260, |
|
"throughput": 17.33, |
|
"latency_mean": 57.854139219230774, |
|
"latency_std": 2.41471729691721, |
|
"latency_50": 57.400247, |
|
"latency_90": 61.3181592, |
|
"latency_95": 62.38365544999999, |
|
"latency_99": 64.46206894, |
|
"latency_999": 65.22337084999998 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 348, |
|
"throughput": 23.2, |
|
"latency_mean": 43.22926725, |
|
"latency_std": 1.1946103356932485, |
|
"latency_50": 43.0492745, |
|
"latency_90": 44.705008299999996, |
|
"latency_95": 45.636852299999994, |
|
"latency_99": 46.63035402, |
|
"latency_999": 46.997215917999995 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 789, |
|
"throughput": 52.6, |
|
"latency_mean": 19.02440978073511, |
|
"latency_std": 1.1428706688800712, |
|
"latency_50": 18.854472, |
|
"latency_90": 20.772688400000003, |
|
"latency_95": 21.339546999999996, |
|
"latency_99": 21.90764228, |
|
"latency_999": 22.18167628399999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 204, |
|
"throughput": 13.6, |
|
"latency_mean": 73.54620642647059, |
|
"latency_std": 1.573277335192714, |
|
"latency_50": 73.3753375, |
|
"latency_90": 75.5904331, |
|
"latency_95": 76.15987129999999, |
|
"latency_99": 78.56596293, |
|
"latency_999": 80.276804319 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 396, |
|
"throughput": 26.4, |
|
"latency_mean": 37.94637206818182, |
|
"latency_std": 0.22863560761755683, |
|
"latency_50": 37.917286, |
|
"latency_90": 38.2418925, |
|
"latency_95": 38.31172375, |
|
"latency_99": 38.762135050000005, |
|
"latency_999": 38.947024510000006 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 215.9852832857143, |
|
"latency_std": 2.6695382982970624, |
|
"latency_50": 215.4558835, |
|
"latency_90": 219.254347, |
|
"latency_95": 219.83214235, |
|
"latency_99": 222.17290862000002, |
|
"latency_999": 222.546938162 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 116, |
|
"throughput": 7.73, |
|
"latency_mean": 130.388671, |
|
"latency_std": 0.9538270722224035, |
|
"latency_50": 130.420778, |
|
"latency_90": 131.4585225, |
|
"latency_95": 131.97905225, |
|
"latency_99": 133.03534015, |
|
"latency_999": 133.14044914 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 68, |
|
"throughput": 4.53, |
|
"latency_mean": 222.9005361617647, |
|
"latency_std": 2.9786389600252616, |
|
"latency_50": 222.130681, |
|
"latency_90": 226.3922357, |
|
"latency_95": 227.17735725, |
|
"latency_99": 231.35961219, |
|
"latency_999": 232.82185281900001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 112, |
|
"throughput": 7.47, |
|
"latency_mean": 135.05900691964285, |
|
"latency_std": 0.852188728183432, |
|
"latency_50": 135.0183545, |
|
"latency_90": 136.08118190000002, |
|
"latency_95": 136.6358585, |
|
"latency_99": 137.6042464, |
|
"latency_999": 137.777202544 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 127, |
|
"throughput": 8.47, |
|
"latency_mean": 118.38734347244095, |
|
"latency_std": 3.928549187092105, |
|
"latency_50": 118.722872, |
|
"latency_90": 119.61111340000001, |
|
"latency_95": 120.56405670000001, |
|
"latency_99": 122.34993956, |
|
"latency_999": 122.379941486 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 233, |
|
"throughput": 15.53, |
|
"latency_mean": 64.56411339484978, |
|
"latency_std": 0.8101063203434803, |
|
"latency_50": 64.369473, |
|
"latency_90": 65.578687, |
|
"latency_95": 66.343236, |
|
"latency_99": 67.23023495999999, |
|
"latency_999": 67.842266136 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 314, |
|
"throughput": 20.93, |
|
"latency_mean": 47.871883372611464, |
|
"latency_std": 0.8675741645315053, |
|
"latency_50": 47.86284, |
|
"latency_90": 48.8556855, |
|
"latency_95": 49.34610644999999, |
|
"latency_99": 50.61812615, |
|
"latency_999": 50.940127244 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1419, |
|
"throughput": 94.6, |
|
"latency_mean": 10.575771353770262, |
|
"latency_std": 0.6992989868391869, |
|
"latency_50": 10.433279, |
|
"latency_90": 11.4260774, |
|
"latency_95": 12.119523699999998, |
|
"latency_99": 12.74776788, |
|
"latency_999": 13.092563020000012 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"precision": 0.9358012339503085, |
|
"recall": 0.9444631437226523, |
|
"f1": 0.9401122372057961, |
|
"accuracy": 0.9882013940267124 |
|
}, |
|
"optimized": { |
|
"precision": 0.06543578604398588, |
|
"recall": 0.24335240659710536, |
|
"f1": 0.10313837375178317, |
|
"accuracy": 0.35697597445582335 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", |
|
"task": "token-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "conll2003", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "tokens", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"ner_tags" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "static", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3105.038\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 315, |
|
"throughput": 21.0, |
|
"latency_mean": 47.722406904761904, |
|
"latency_std": 0.78575656702049, |
|
"latency_50": 47.657137, |
|
"latency_90": 48.539485, |
|
"latency_95": 49.1951445, |
|
"latency_99": 50.459615660000004, |
|
"latency_999": 51.17226236 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1829, |
|
"throughput": 121.93, |
|
"latency_mean": 8.204672595407327, |
|
"latency_std": 0.6281598971222003, |
|
"latency_50": 8.030814, |
|
"latency_90": 9.035519800000001, |
|
"latency_95": 9.1478804, |
|
"latency_99": 9.59396184, |
|
"latency_999": 10.840923620000012 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 140, |
|
"throughput": 9.33, |
|
"latency_mean": 107.46691665714286, |
|
"latency_std": 14.217711617362141, |
|
"latency_50": 118.404271, |
|
"latency_90": 119.217932, |
|
"latency_95": 120.1323987, |
|
"latency_99": 121.88812750999999, |
|
"latency_999": 122.23331556199999 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 336, |
|
"throughput": 22.4, |
|
"latency_mean": 44.742087273809524, |
|
"latency_std": 1.297316432066614, |
|
"latency_50": 44.5606265, |
|
"latency_90": 46.6604945, |
|
"latency_95": 47.15595925, |
|
"latency_99": 47.76239855, |
|
"latency_999": 48.352806460000004 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 220.25039004347826, |
|
"latency_std": 1.5005813984244252, |
|
"latency_50": 219.820796, |
|
"latency_90": 222.1719386, |
|
"latency_95": 224.2704662, |
|
"latency_99": 225.16129407999998, |
|
"latency_999": 225.809551408 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 160, |
|
"throughput": 10.67, |
|
"latency_mean": 94.04712534375, |
|
"latency_std": 4.044440830749728, |
|
"latency_50": 92.7247505, |
|
"latency_90": 100.1824987, |
|
"latency_95": 103.149005, |
|
"latency_99": 106.80153862, |
|
"latency_999": 107.010720374 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 251, |
|
"throughput": 16.73, |
|
"latency_mean": 59.98497594422311, |
|
"latency_std": 1.3002235321937636, |
|
"latency_50": 59.807255, |
|
"latency_90": 61.620392, |
|
"latency_95": 62.9347585, |
|
"latency_99": 63.5797715, |
|
"latency_999": 64.073255 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 933, |
|
"throughput": 62.2, |
|
"latency_mean": 16.080496909967845, |
|
"latency_std": 0.6562666342873719, |
|
"latency_50": 16.202643, |
|
"latency_90": 16.5627894, |
|
"latency_95": 16.7647474, |
|
"latency_99": 17.10871436, |
|
"latency_999": 17.232195299999997 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 271, |
|
"throughput": 18.07, |
|
"latency_mean": 55.44747836162362, |
|
"latency_std": 1.1058764508579348, |
|
"latency_50": 55.358138, |
|
"latency_90": 56.887965, |
|
"latency_95": 57.3142435, |
|
"latency_99": 58.44232040000001, |
|
"latency_999": 58.78495296 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 540, |
|
"throughput": 36.0, |
|
"latency_mean": 27.828701338888887, |
|
"latency_std": 0.2830425296733134, |
|
"latency_50": 27.783282, |
|
"latency_90": 28.192628600000003, |
|
"latency_95": 28.425469149999998, |
|
"latency_99": 28.75255937, |
|
"latency_999": 28.907412832000002 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 211, |
|
"throughput": 14.07, |
|
"latency_mean": 71.34490764454976, |
|
"latency_std": 0.8606794549682575, |
|
"latency_50": 71.218379, |
|
"latency_90": 72.307616, |
|
"latency_95": 72.7004855, |
|
"latency_99": 73.44670070000001, |
|
"latency_999": 76.77649755999997 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 473, |
|
"throughput": 31.53, |
|
"latency_mean": 31.754805171247355, |
|
"latency_std": 0.4300285133323451, |
|
"latency_50": 31.703291, |
|
"latency_90": 32.3621104, |
|
"latency_95": 32.6649158, |
|
"latency_99": 33.04651476, |
|
"latency_999": 33.134080912 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 125, |
|
"throughput": 8.33, |
|
"latency_mean": 120.569009496, |
|
"latency_std": 0.8160222004383323, |
|
"latency_50": 120.371051, |
|
"latency_90": 121.2576836, |
|
"latency_95": 122.412088, |
|
"latency_99": 123.63907148, |
|
"latency_999": 124.379808824 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 275, |
|
"throughput": 18.33, |
|
"latency_mean": 54.700663727272726, |
|
"latency_std": 0.6805625823558532, |
|
"latency_50": 54.534815, |
|
"latency_90": 55.637867, |
|
"latency_95": 56.2308853, |
|
"latency_99": 57.06877728, |
|
"latency_999": 57.400091958 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 45, |
|
"throughput": 3.0, |
|
"latency_mean": 337.56930973333334, |
|
"latency_std": 42.73015832642524, |
|
"latency_50": 311.940976, |
|
"latency_90": 408.1195232, |
|
"latency_95": 409.32610719999997, |
|
"latency_99": 409.70182116, |
|
"latency_999": 409.771442316 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 73, |
|
"throughput": 4.87, |
|
"latency_mean": 205.58670958904108, |
|
"latency_std": 17.128151144285876, |
|
"latency_50": 216.043924, |
|
"latency_90": 219.5796966, |
|
"latency_95": 221.6779058, |
|
"latency_99": 224.22378336000003, |
|
"latency_999": 224.454495336 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 215.1244777, |
|
"latency_std": 1.555883759703903, |
|
"latency_50": 214.6171285, |
|
"latency_90": 216.7817589, |
|
"latency_95": 218.67742330000002, |
|
"latency_99": 220.89366772999998, |
|
"latency_999": 221.10970307300002 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 173, |
|
"throughput": 11.53, |
|
"latency_mean": 86.89619405780347, |
|
"latency_std": 2.3718385044122723, |
|
"latency_50": 86.535865, |
|
"latency_90": 90.37287020000001, |
|
"latency_95": 91.8051702, |
|
"latency_99": 92.75978772, |
|
"latency_999": 92.817659772 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"precision": 0.9358012339503085, |
|
"recall": 0.9444631437226523, |
|
"f1": 0.9401122372057961, |
|
"accuracy": 0.9882013940267124 |
|
}, |
|
"optimized": { |
|
"precision": 0.9038969616908851, |
|
"recall": 0.9212386401884888, |
|
"f1": 0.912485414235706, |
|
"accuracy": 0.9842295860753086 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", |
|
"task": "token-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "conll2003", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "tokens", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"ner_tags" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3100.033\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 167, |
|
"throughput": 11.13, |
|
"latency_mean": 90.0069231257485, |
|
"latency_std": 1.7115040048180659, |
|
"latency_50": 89.808392, |
|
"latency_90": 92.1615498, |
|
"latency_95": 93.00187689999998, |
|
"latency_99": 94.87745056, |
|
"latency_999": 96.02146185400001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 347, |
|
"throughput": 23.13, |
|
"latency_mean": 43.265280452449566, |
|
"latency_std": 2.7751472818818734, |
|
"latency_50": 44.883167, |
|
"latency_90": 45.35501180000001, |
|
"latency_95": 46.0536215, |
|
"latency_99": 46.73552832, |
|
"latency_999": 46.9292037 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 311, |
|
"throughput": 20.73, |
|
"latency_mean": 48.2705229710611, |
|
"latency_std": 1.1247628435592778, |
|
"latency_50": 48.262398, |
|
"latency_90": 49.675041, |
|
"latency_95": 50.1826075, |
|
"latency_99": 51.5223755, |
|
"latency_999": 52.486126479999996 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1955, |
|
"throughput": 130.33, |
|
"latency_mean": 7.675419680306905, |
|
"latency_std": 0.3408992210776522, |
|
"latency_50": 7.708051, |
|
"latency_90": 7.9747872, |
|
"latency_95": 8.22643, |
|
"latency_99": 8.42614896, |
|
"latency_999": 9.247490550000006 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 214.59417684285717, |
|
"latency_std": 1.7664217544171172, |
|
"latency_50": 214.2422325, |
|
"latency_90": 216.45445769999998, |
|
"latency_95": 218.51388645, |
|
"latency_99": 220.63894481, |
|
"latency_999": 221.498688881 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 171, |
|
"throughput": 11.4, |
|
"latency_mean": 87.8829150994152, |
|
"latency_std": 0.9834434065965216, |
|
"latency_50": 87.63179, |
|
"latency_90": 88.639136, |
|
"latency_95": 90.3700155, |
|
"latency_99": 91.69530259999999, |
|
"latency_999": 92.0263997 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 170, |
|
"throughput": 11.33, |
|
"latency_mean": 88.68199067647059, |
|
"latency_std": 1.5268934268549699, |
|
"latency_50": 88.7044375, |
|
"latency_90": 90.6080309, |
|
"latency_95": 91.1070495, |
|
"latency_99": 92.72121441, |
|
"latency_999": 94.157151444 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 347, |
|
"throughput": 23.13, |
|
"latency_mean": 43.326895181556196, |
|
"latency_std": 0.4931306808409643, |
|
"latency_50": 43.189003, |
|
"latency_90": 44.0324068, |
|
"latency_95": 44.3867949, |
|
"latency_99": 45.111986800000004, |
|
"latency_999": 45.838947726 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 205, |
|
"throughput": 13.67, |
|
"latency_mean": 73.52497626829268, |
|
"latency_std": 1.0865490902318493, |
|
"latency_50": 73.373698, |
|
"latency_90": 74.60406520000001, |
|
"latency_95": 74.979193, |
|
"latency_99": 76.63692036, |
|
"latency_999": 80.98970668399998 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 562, |
|
"throughput": 37.47, |
|
"latency_mean": 26.697691247330958, |
|
"latency_std": 0.33379520623836, |
|
"latency_50": 26.612126, |
|
"latency_90": 27.0071515, |
|
"latency_95": 27.35591295, |
|
"latency_99": 27.96691773, |
|
"latency_999": 28.784246988999993 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 210, |
|
"throughput": 14.0, |
|
"latency_mean": 71.64548409523809, |
|
"latency_std": 0.9364269258806002, |
|
"latency_50": 71.3511745, |
|
"latency_90": 73.0493582, |
|
"latency_95": 73.74027225, |
|
"latency_99": 74.29445254000001, |
|
"latency_999": 76.295386572 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 700, |
|
"throughput": 46.67, |
|
"latency_mean": 21.43839562, |
|
"latency_std": 0.7747656036456344, |
|
"latency_50": 21.458011, |
|
"latency_90": 22.4099538, |
|
"latency_95": 22.8598522, |
|
"latency_99": 23.88133127, |
|
"latency_999": 24.30084730600001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 341, |
|
"throughput": 22.73, |
|
"latency_mean": 44.00805451906158, |
|
"latency_std": 2.0146525121369048, |
|
"latency_50": 43.40077, |
|
"latency_90": 46.844243, |
|
"latency_95": 48.01232, |
|
"latency_99": 48.8285812, |
|
"latency_999": 49.91079640000002 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1191, |
|
"throughput": 79.4, |
|
"latency_mean": 12.596608948782535, |
|
"latency_std": 0.4604972017819279, |
|
"latency_50": 12.560364, |
|
"latency_90": 13.254538, |
|
"latency_95": 13.4481775, |
|
"latency_99": 13.824474499999997, |
|
"latency_999": 14.306386329999988 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 89, |
|
"throughput": 5.93, |
|
"latency_mean": 170.04430629213482, |
|
"latency_std": 1.7973801840824346, |
|
"latency_50": 169.804268, |
|
"latency_90": 172.3942636, |
|
"latency_95": 172.909571, |
|
"latency_99": 174.41490236, |
|
"latency_999": 174.83032853600002 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 184, |
|
"throughput": 12.27, |
|
"latency_mean": 81.67628805434782, |
|
"latency_std": 2.2754557516244045, |
|
"latency_50": 81.0874735, |
|
"latency_90": 85.0994149, |
|
"latency_95": 85.74545605, |
|
"latency_99": 88.41751725, |
|
"latency_999": 89.711957766 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 37, |
|
"throughput": 2.47, |
|
"latency_mean": 414.4216835675676, |
|
"latency_std": 3.861346436570963, |
|
"latency_50": 415.141617, |
|
"latency_90": 419.931193, |
|
"latency_95": 421.07318360000005, |
|
"latency_99": 421.86215943999997, |
|
"latency_999": 422.088350644 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 98, |
|
"throughput": 6.53, |
|
"latency_mean": 154.51860542857142, |
|
"latency_std": 3.911988556596126, |
|
"latency_50": 153.7217105, |
|
"latency_90": 159.4319015, |
|
"latency_95": 161.4452743, |
|
"latency_99": 163.25851040999999, |
|
"latency_999": 163.55056994100002 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"precision": 0.9358012339503085, |
|
"recall": 0.9444631437226523, |
|
"f1": 0.9401122372057961, |
|
"accuracy": 0.9882013940267124 |
|
}, |
|
"optimized": { |
|
"precision": 0.9337560487235108, |
|
"recall": 0.9417704476607203, |
|
"f1": 0.9377461248428991, |
|
"accuracy": 0.9878314707371209 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", |
|
"task": "token-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "conll2003", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "tokens", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"ner_tags" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3125.174\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 210, |
|
"throughput": 14.0, |
|
"latency_mean": 71.72416266666667, |
|
"latency_std": 1.0823165199145606, |
|
"latency_50": 71.4790375, |
|
"latency_90": 72.9353617, |
|
"latency_95": 73.99241959999999, |
|
"latency_99": 75.17633138, |
|
"latency_999": 75.422236178 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 272, |
|
"throughput": 18.13, |
|
"latency_mean": 55.160912452205885, |
|
"latency_std": 0.6521145589906397, |
|
"latency_50": 55.189684, |
|
"latency_90": 55.7469555, |
|
"latency_95": 55.9487205, |
|
"latency_99": 57.71467353, |
|
"latency_999": 58.06304184299999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 159, |
|
"throughput": 10.6, |
|
"latency_mean": 94.34125967295597, |
|
"latency_std": 10.03252637987446, |
|
"latency_50": 91.050963, |
|
"latency_90": 120.90638, |
|
"latency_95": 121.39196709999999, |
|
"latency_99": 122.02035808, |
|
"latency_999": 122.622120378 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 141, |
|
"throughput": 9.4, |
|
"latency_mean": 107.02119182269503, |
|
"latency_std": 0.8366529782537496, |
|
"latency_50": 106.835229, |
|
"latency_90": 108.249955, |
|
"latency_95": 108.628056, |
|
"latency_99": 109.2045378, |
|
"latency_999": 110.26474030000001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 248, |
|
"throughput": 16.53, |
|
"latency_mean": 60.49658227016129, |
|
"latency_std": 0.8819758121225761, |
|
"latency_50": 60.343132, |
|
"latency_90": 61.6551285, |
|
"latency_95": 62.34768465, |
|
"latency_99": 63.31741764, |
|
"latency_999": 63.487983883999995 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 503, |
|
"throughput": 33.53, |
|
"latency_mean": 29.869608675944335, |
|
"latency_std": 0.3559872739101281, |
|
"latency_50": 29.794297, |
|
"latency_90": 30.1428484, |
|
"latency_95": 30.4600473, |
|
"latency_99": 30.75200356, |
|
"latency_999": 33.756738917999996 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 445, |
|
"throughput": 29.67, |
|
"latency_mean": 33.73988259325843, |
|
"latency_std": 1.5629102688800236, |
|
"latency_50": 33.480773, |
|
"latency_90": 35.338715799999996, |
|
"latency_95": 35.7559136, |
|
"latency_99": 36.873395439999996, |
|
"latency_999": 47.8180268919997 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1019, |
|
"throughput": 67.93, |
|
"latency_mean": 14.732223261040234, |
|
"latency_std": 0.9828893965554253, |
|
"latency_50": 14.401579, |
|
"latency_90": 16.897523, |
|
"latency_95": 17.0312649, |
|
"latency_99": 17.46899586, |
|
"latency_999": 17.893012501999998 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 210, |
|
"throughput": 14.0, |
|
"latency_mean": 71.59536255238095, |
|
"latency_std": 1.13338439776916, |
|
"latency_50": 71.154012, |
|
"latency_90": 73.357109, |
|
"latency_95": 73.92077855, |
|
"latency_99": 74.25820067000001, |
|
"latency_999": 75.003235419 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 282, |
|
"throughput": 18.8, |
|
"latency_mean": 53.25616293617021, |
|
"latency_std": 4.987232229454594, |
|
"latency_50": 56.9742995, |
|
"latency_90": 57.4831075, |
|
"latency_95": 58.0667945, |
|
"latency_99": 59.20656624, |
|
"latency_999": 62.057965914999976 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 48, |
|
"throughput": 3.2, |
|
"latency_mean": 314.12435689583333, |
|
"latency_std": 4.446221158816441, |
|
"latency_50": 314.1643195, |
|
"latency_90": 319.1971656, |
|
"latency_95": 322.43416895, |
|
"latency_99": 326.67617025, |
|
"latency_999": 327.501665325 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 47, |
|
"throughput": 3.13, |
|
"latency_mean": 323.93693336170213, |
|
"latency_std": 6.869443185578473, |
|
"latency_50": 323.040004, |
|
"latency_90": 334.16514060000003, |
|
"latency_95": 334.9778985, |
|
"latency_99": 336.97889436, |
|
"latency_999": 337.249830036 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 70, |
|
"throughput": 4.67, |
|
"latency_mean": 216.0601891, |
|
"latency_std": 2.0959098467954616, |
|
"latency_50": 215.485022, |
|
"latency_90": 219.06993409999998, |
|
"latency_95": 219.52531109999998, |
|
"latency_99": 222.29498567000002, |
|
"latency_999": 223.08344266699999 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 92, |
|
"throughput": 6.13, |
|
"latency_mean": 163.75156455434782, |
|
"latency_std": 7.2184659324399, |
|
"latency_50": 161.7846625, |
|
"latency_90": 176.00411860000003, |
|
"latency_95": 179.22768994999998, |
|
"latency_99": 183.41052014000002, |
|
"latency_999": 188.60359111400004 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 165, |
|
"throughput": 11.0, |
|
"latency_mean": 91.31757051515152, |
|
"latency_std": 2.4793130848747467, |
|
"latency_50": 91.036743, |
|
"latency_90": 93.733273, |
|
"latency_95": 95.67535299999999, |
|
"latency_99": 100.79730819999997, |
|
"latency_999": 102.770634928 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 163, |
|
"throughput": 10.87, |
|
"latency_mean": 92.08470889570552, |
|
"latency_std": 11.432354704165576, |
|
"latency_50": 84.815059, |
|
"latency_90": 105.0187196, |
|
"latency_95": 105.2827365, |
|
"latency_99": 106.12717995999999, |
|
"latency_999": 106.21855146600001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 80, |
|
"throughput": 5.33, |
|
"latency_mean": 188.59378125, |
|
"latency_std": 22.91815791529492, |
|
"latency_50": 172.9137385, |
|
"latency_90": 219.50391, |
|
"latency_95": 220.21657405000002, |
|
"latency_99": 221.94066489, |
|
"latency_999": 223.095406389 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 88, |
|
"throughput": 5.87, |
|
"latency_mean": 171.786525, |
|
"latency_std": 7.080702021982688, |
|
"latency_50": 170.577715, |
|
"latency_90": 184.1455852, |
|
"latency_95": 186.60128294999998, |
|
"latency_99": 190.34417446, |
|
"latency_999": 190.786445746 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"precision": 0.9358012339503085, |
|
"recall": 0.9444631437226523, |
|
"f1": 0.9401122372057961, |
|
"accuracy": 0.9882013940267124 |
|
}, |
|
"optimized": { |
|
"precision": 0.9358012339503085, |
|
"recall": 0.9444631437226523, |
|
"f1": 0.9401122372057961, |
|
"accuracy": 0.9882013940267124 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", |
|
"task": "token-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "conll2003", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "tokens", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"ner_tags" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3102.480\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 37, |
|
"throughput": 2.47, |
|
"latency_mean": 408.1543652972973, |
|
"latency_std": 3.7417217521654447, |
|
"latency_50": 408.204875, |
|
"latency_90": 412.71899360000003, |
|
"latency_95": 415.5526328, |
|
"latency_99": 417.56349056, |
|
"latency_999": 418.43689865600004 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 47, |
|
"throughput": 3.13, |
|
"latency_mean": 325.0274505106383, |
|
"latency_std": 16.42866903352087, |
|
"latency_50": 323.280477, |
|
"latency_90": 336.7398878, |
|
"latency_95": 340.73717709999994, |
|
"latency_99": 392.46748674, |
|
"latency_999": 395.68078127399997 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 270, |
|
"throughput": 18.0, |
|
"latency_mean": 55.55779645555556, |
|
"latency_std": 1.1666476240332937, |
|
"latency_50": 55.618944, |
|
"latency_90": 57.023193799999994, |
|
"latency_95": 57.38550095, |
|
"latency_99": 58.569932480000006, |
|
"latency_999": 59.960250984 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 342, |
|
"throughput": 22.8, |
|
"latency_mean": 43.866120190058474, |
|
"latency_std": 1.226638661359772, |
|
"latency_50": 43.698571, |
|
"latency_90": 45.555251, |
|
"latency_95": 46.4943021, |
|
"latency_99": 47.907215019999995, |
|
"latency_999": 48.302261889 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 126, |
|
"throughput": 8.4, |
|
"latency_mean": 119.10938425396826, |
|
"latency_std": 5.242609662991225, |
|
"latency_50": 119.8814875, |
|
"latency_90": 122.013498, |
|
"latency_95": 122.8530875, |
|
"latency_99": 124.503561, |
|
"latency_999": 125.157123625 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 182, |
|
"throughput": 12.13, |
|
"latency_mean": 82.46068521428572, |
|
"latency_std": 1.8851982998377101, |
|
"latency_50": 82.4162485, |
|
"latency_90": 84.9214402, |
|
"latency_95": 85.51261295, |
|
"latency_99": 87.40860171, |
|
"latency_999": 88.40375641499999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 448, |
|
"throughput": 29.87, |
|
"latency_mean": 33.48908037053571, |
|
"latency_std": 1.1547087313019704, |
|
"latency_50": 33.380541, |
|
"latency_90": 35.0732339, |
|
"latency_95": 35.621661700000004, |
|
"latency_99": 36.54991294999999, |
|
"latency_999": 37.980162713999995 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1059, |
|
"throughput": 70.6, |
|
"latency_mean": 14.167183152974506, |
|
"latency_std": 0.3567793828104339, |
|
"latency_50": 14.113569, |
|
"latency_90": 14.613918199999999, |
|
"latency_95": 14.760972, |
|
"latency_99": 15.203892, |
|
"latency_999": 15.846131796 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 69, |
|
"throughput": 4.6, |
|
"latency_mean": 219.8001221014493, |
|
"latency_std": 1.682059211691071, |
|
"latency_50": 219.207299, |
|
"latency_90": 222.769191, |
|
"latency_95": 223.179612, |
|
"latency_99": 224.12751892, |
|
"latency_999": 225.31481789199998 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 92, |
|
"throughput": 6.13, |
|
"latency_mean": 163.62187851086955, |
|
"latency_std": 3.9886368861391612, |
|
"latency_50": 163.256997, |
|
"latency_90": 168.68158590000002, |
|
"latency_95": 170.24915575, |
|
"latency_99": 173.29093638, |
|
"latency_999": 175.298208738 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 214, |
|
"throughput": 14.27, |
|
"latency_mean": 70.39102674299066, |
|
"latency_std": 0.645440942501749, |
|
"latency_50": 70.2169265, |
|
"latency_90": 71.08614279999999, |
|
"latency_95": 71.45404645, |
|
"latency_99": 73.1100614, |
|
"latency_999": 74.05629770400002 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 265, |
|
"throughput": 17.67, |
|
"latency_mean": 56.682904645283024, |
|
"latency_std": 0.29074631764368225, |
|
"latency_50": 56.604756, |
|
"latency_90": 56.9511692, |
|
"latency_95": 57.2699554, |
|
"latency_99": 58.03212468, |
|
"latency_999": 58.197217128 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 85, |
|
"throughput": 5.67, |
|
"latency_mean": 176.69271157647057, |
|
"latency_std": 19.225586228496905, |
|
"latency_50": 166.86136, |
|
"latency_90": 213.846849, |
|
"latency_95": 214.451406, |
|
"latency_99": 216.24402419999998, |
|
"latency_999": 217.66288122 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 72, |
|
"throughput": 4.8, |
|
"latency_mean": 209.2788515277778, |
|
"latency_std": 1.0661208330184972, |
|
"latency_50": 209.1019465, |
|
"latency_90": 210.5121405, |
|
"latency_95": 211.63171434999998, |
|
"latency_99": 212.82214144, |
|
"latency_999": 212.860057144 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 170, |
|
"throughput": 11.33, |
|
"latency_mean": 88.32521131764706, |
|
"latency_std": 1.6082421836618368, |
|
"latency_50": 88.34603, |
|
"latency_90": 90.24364059999999, |
|
"latency_95": 90.82843125, |
|
"latency_99": 92.49519597, |
|
"latency_999": 94.140928359 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 160, |
|
"throughput": 10.67, |
|
"latency_mean": 94.18205897499999, |
|
"latency_std": 11.564821917485974, |
|
"latency_50": 103.343016, |
|
"latency_90": 104.0051318, |
|
"latency_95": 104.4116828, |
|
"latency_99": 105.75135414, |
|
"latency_999": 106.152972641 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 331, |
|
"throughput": 22.07, |
|
"latency_mean": 45.34665541087613, |
|
"latency_std": 4.816947937501302, |
|
"latency_50": 43.877047, |
|
"latency_90": 48.211528, |
|
"latency_95": 59.708112, |
|
"latency_99": 60.144242399999996, |
|
"latency_999": 60.57435263 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 623, |
|
"throughput": 41.53, |
|
"latency_mean": 24.101585462279292, |
|
"latency_std": 0.490914928207836, |
|
"latency_50": 24.033518, |
|
"latency_90": 24.7787182, |
|
"latency_95": 25.0034624, |
|
"latency_99": 25.4011857, |
|
"latency_999": 25.923231378000025 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"precision": 0.9358012339503085, |
|
"recall": 0.9444631437226523, |
|
"f1": 0.9401122372057961, |
|
"accuracy": 0.9882013940267124 |
|
}, |
|
"optimized": { |
|
"precision": 0.9358012339503085, |
|
"recall": 0.9444631437226523, |
|
"f1": 0.9401122372057961, |
|
"accuracy": 0.9882013940267124 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", |
|
"task": "token-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "conll2003", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "tokens", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"ner_tags" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "static", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3099.991\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 348, |
|
"throughput": 23.2, |
|
"latency_mean": 43.15195731609195, |
|
"latency_std": 1.3049791783770857, |
|
"latency_50": 43.026682, |
|
"latency_90": 44.93291970000001, |
|
"latency_95": 45.4691658, |
|
"latency_99": 46.60123, |
|
"latency_999": 47.391866047 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 456, |
|
"throughput": 30.4, |
|
"latency_mean": 32.961429899122805, |
|
"latency_std": 0.22594982626954999, |
|
"latency_50": 32.925405, |
|
"latency_90": 33.210001, |
|
"latency_95": 33.29684425, |
|
"latency_99": 33.561622449999994, |
|
"latency_999": 34.45556356 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 328, |
|
"throughput": 21.87, |
|
"latency_mean": 45.769709847560975, |
|
"latency_std": 5.105158395405133, |
|
"latency_50": 47.564638, |
|
"latency_90": 49.1212201, |
|
"latency_95": 49.496884, |
|
"latency_99": 50.38536208, |
|
"latency_999": 50.559775634000005 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 790, |
|
"throughput": 52.67, |
|
"latency_mean": 18.99811082658228, |
|
"latency_std": 0.18571276934069156, |
|
"latency_50": 18.955887, |
|
"latency_90": 19.2610981, |
|
"latency_95": 19.371919350000002, |
|
"latency_99": 19.535236910000002, |
|
"latency_999": 19.715454875 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 214, |
|
"throughput": 14.27, |
|
"latency_mean": 70.2763914906542, |
|
"latency_std": 0.2860076644639914, |
|
"latency_50": 70.2829375, |
|
"latency_90": 70.6419829, |
|
"latency_95": 70.73166590000001, |
|
"latency_99": 70.99700557, |
|
"latency_999": 71.13112393099999 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 240, |
|
"throughput": 16.0, |
|
"latency_mean": 62.699298579166665, |
|
"latency_std": 0.3115197355843105, |
|
"latency_50": 62.624987, |
|
"latency_90": 63.0751223, |
|
"latency_95": 63.22834435, |
|
"latency_99": 63.65015018, |
|
"latency_999": 64.543749036 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 71, |
|
"throughput": 4.73, |
|
"latency_mean": 213.8736818169014, |
|
"latency_std": 1.581524798477253, |
|
"latency_50": 213.656346, |
|
"latency_90": 214.2873, |
|
"latency_95": 214.4610125, |
|
"latency_99": 218.50125879999996, |
|
"latency_999": 225.79765168000006 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 67, |
|
"throughput": 4.47, |
|
"latency_mean": 224.49603488059702, |
|
"latency_std": 14.179003233192402, |
|
"latency_50": 228.344384, |
|
"latency_90": 228.9939548, |
|
"latency_95": 229.2531382, |
|
"latency_99": 229.51218631999998, |
|
"latency_999": 229.879069232 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 126, |
|
"throughput": 8.4, |
|
"latency_mean": 119.57019325396826, |
|
"latency_std": 0.9318178327933169, |
|
"latency_50": 119.624132, |
|
"latency_90": 120.7031565, |
|
"latency_95": 120.95046825, |
|
"latency_99": 121.62923875, |
|
"latency_999": 122.22802 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 163, |
|
"throughput": 10.87, |
|
"latency_mean": 92.49704997546013, |
|
"latency_std": 3.5224258875712082, |
|
"latency_50": 91.538022, |
|
"latency_90": 98.7628616, |
|
"latency_95": 99.7127255, |
|
"latency_99": 101.79093066, |
|
"latency_999": 103.67060976999998 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 85, |
|
"throughput": 5.67, |
|
"latency_mean": 177.4090096117647, |
|
"latency_std": 18.424368744880013, |
|
"latency_50": 169.497669, |
|
"latency_90": 219.17437120000002, |
|
"latency_95": 221.619667, |
|
"latency_99": 223.05841704, |
|
"latency_999": 223.330345704 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 71, |
|
"throughput": 4.73, |
|
"latency_mean": 211.88950738028169, |
|
"latency_std": 29.120931744682288, |
|
"latency_50": 237.172705, |
|
"latency_90": 238.463971, |
|
"latency_95": 238.7496675, |
|
"latency_99": 239.3086489, |
|
"latency_999": 239.40512899 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 272, |
|
"throughput": 18.13, |
|
"latency_mean": 55.23031008455882, |
|
"latency_std": 1.1571386368235503, |
|
"latency_50": 55.2722705, |
|
"latency_90": 56.607078200000004, |
|
"latency_95": 57.48416465, |
|
"latency_99": 58.30250327, |
|
"latency_999": 58.528941114 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 253, |
|
"throughput": 16.87, |
|
"latency_mean": 59.30528993675889, |
|
"latency_std": 0.2044975324140483, |
|
"latency_50": 59.279888, |
|
"latency_90": 59.5549104, |
|
"latency_95": 59.60868, |
|
"latency_99": 59.898025759999996, |
|
"latency_999": 60.518632308 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 37, |
|
"throughput": 2.47, |
|
"latency_mean": 409.6796750540541, |
|
"latency_std": 3.4623663344659903, |
|
"latency_50": 409.339737, |
|
"latency_90": 413.77022339999996, |
|
"latency_95": 416.1084598, |
|
"latency_99": 419.1264798, |
|
"latency_999": 419.41464377999995 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 31, |
|
"throughput": 2.07, |
|
"latency_mean": 493.00366906451615, |
|
"latency_std": 3.929536871101732, |
|
"latency_50": 493.144742, |
|
"latency_90": 498.634207, |
|
"latency_95": 498.8949985, |
|
"latency_99": 499.6773647, |
|
"latency_999": 499.91265107 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 173, |
|
"throughput": 11.53, |
|
"latency_mean": 87.05498305780347, |
|
"latency_std": 1.5424139654073576, |
|
"latency_50": 86.877938, |
|
"latency_90": 88.887705, |
|
"latency_95": 89.4437458, |
|
"latency_99": 91.18471404, |
|
"latency_999": 92.01616860000001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 135, |
|
"throughput": 9.0, |
|
"latency_mean": 111.36274993333333, |
|
"latency_std": 9.414259569194291, |
|
"latency_50": 115.101582, |
|
"latency_90": 116.2764454, |
|
"latency_95": 116.6387619, |
|
"latency_99": 116.93107282, |
|
"latency_999": 117.27618652400001 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"precision": 0.9358012339503085, |
|
"recall": 0.9444631437226523, |
|
"f1": 0.9401122372057961, |
|
"accuracy": 0.9882013940267124 |
|
}, |
|
"optimized": { |
|
"precision": 0.9087171052631579, |
|
"recall": 0.929821608885897, |
|
"f1": 0.9191482282482116, |
|
"accuracy": 0.9856898095868541 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", |
|
"task": "token-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "conll2003", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "tokens", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"ner_tags" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "static", |
|
"operators_to_quantize": [ |
|
"Add" |
|
], |
|
"node_exclusion": [], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3119.116\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 90, |
|
"throughput": 6.0, |
|
"latency_mean": 168.29981604444444, |
|
"latency_std": 3.0980387809618484, |
|
"latency_50": 167.5409385, |
|
"latency_90": 170.98883170000002, |
|
"latency_95": 174.5708666, |
|
"latency_99": 179.67467568, |
|
"latency_999": 183.294064668 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 75, |
|
"throughput": 5.0, |
|
"latency_mean": 201.88371818666667, |
|
"latency_std": 5.1702570236285155, |
|
"latency_50": 201.088276, |
|
"latency_90": 208.22007219999998, |
|
"latency_95": 211.4593359, |
|
"latency_99": 214.37736256000002, |
|
"latency_999": 218.26520815600003 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 128, |
|
"throughput": 8.53, |
|
"latency_mean": 117.345409171875, |
|
"latency_std": 1.0232306892142995, |
|
"latency_50": 117.570859, |
|
"latency_90": 118.4811602, |
|
"latency_95": 118.66072285, |
|
"latency_99": 119.97331652, |
|
"latency_999": 120.173632319 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 147, |
|
"throughput": 9.8, |
|
"latency_mean": 102.08866921088435, |
|
"latency_std": 4.207024690408841, |
|
"latency_50": 100.900148, |
|
"latency_90": 106.6952282, |
|
"latency_95": 111.67752269999998, |
|
"latency_99": 115.24070447999998, |
|
"latency_999": 122.8603576240001 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 256, |
|
"throughput": 17.07, |
|
"latency_mean": 58.79949217578125, |
|
"latency_std": 7.2389593462092225, |
|
"latency_50": 55.00797, |
|
"latency_90": 71.60057, |
|
"latency_95": 72.5113045, |
|
"latency_99": 72.87857595, |
|
"latency_999": 73.62029899000001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 228, |
|
"throughput": 15.2, |
|
"latency_mean": 66.03353574122808, |
|
"latency_std": 0.2155799936674679, |
|
"latency_50": 66.018175, |
|
"latency_90": 66.2633382, |
|
"latency_95": 66.3559738, |
|
"latency_99": 66.66150048, |
|
"latency_999": 67.423175391 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 342, |
|
"throughput": 22.8, |
|
"latency_mean": 44.01321974853801, |
|
"latency_std": 4.990156500118981, |
|
"latency_50": 42.3942795, |
|
"latency_90": 56.7239391, |
|
"latency_95": 57.8124011, |
|
"latency_99": 58.51948686, |
|
"latency_999": 59.132265571 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 474, |
|
"throughput": 31.6, |
|
"latency_mean": 31.67708501898734, |
|
"latency_std": 1.0256274074298153, |
|
"latency_50": 31.644259, |
|
"latency_90": 33.042136, |
|
"latency_95": 33.4474054, |
|
"latency_99": 34.71017928, |
|
"latency_999": 35.40770895199999 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 129, |
|
"throughput": 8.6, |
|
"latency_mean": 116.96203879069768, |
|
"latency_std": 0.9805415537137262, |
|
"latency_50": 116.711745, |
|
"latency_90": 117.476479, |
|
"latency_95": 119.6996568, |
|
"latency_99": 120.4817116, |
|
"latency_999": 120.693500216 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 152, |
|
"throughput": 10.13, |
|
"latency_mean": 98.81530088157895, |
|
"latency_std": 2.408744102355572, |
|
"latency_50": 98.4953865, |
|
"latency_90": 101.9166918, |
|
"latency_95": 103.45597070000001, |
|
"latency_99": 105.89102432000001, |
|
"latency_999": 107.434858943 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 72, |
|
"throughput": 4.8, |
|
"latency_mean": 211.162219375, |
|
"latency_std": 0.50332579026598, |
|
"latency_50": 211.161418, |
|
"latency_90": 211.835143, |
|
"latency_95": 211.9189473, |
|
"latency_99": 212.26486029, |
|
"latency_999": 212.469340929 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 78, |
|
"throughput": 5.2, |
|
"latency_mean": 193.01161867948716, |
|
"latency_std": 4.477161349632226, |
|
"latency_50": 193.023931, |
|
"latency_90": 198.0193413, |
|
"latency_95": 200.14843679999998, |
|
"latency_99": 206.14731525000002, |
|
"latency_999": 211.593862125 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 235, |
|
"throughput": 15.67, |
|
"latency_mean": 63.85546833191489, |
|
"latency_std": 6.973859157506543, |
|
"latency_50": 69.383357, |
|
"latency_90": 70.026538, |
|
"latency_95": 70.3286013, |
|
"latency_99": 70.6866161, |
|
"latency_999": 71.185565742 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 244, |
|
"throughput": 16.27, |
|
"latency_mean": 61.635230135245905, |
|
"latency_std": 5.810202171663617, |
|
"latency_50": 58.524244, |
|
"latency_90": 70.1898687, |
|
"latency_95": 70.36230454999999, |
|
"latency_99": 70.64778987000001, |
|
"latency_999": 72.10732932400002 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 38, |
|
"throughput": 2.53, |
|
"latency_mean": 397.28297392105264, |
|
"latency_std": 0.6990720798085198, |
|
"latency_50": 397.169161, |
|
"latency_90": 398.30781060000004, |
|
"latency_95": 398.40529275, |
|
"latency_99": 398.80296562, |
|
"latency_999": 398.940152962 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 38, |
|
"throughput": 2.53, |
|
"latency_mean": 397.74469242105266, |
|
"latency_std": 9.112867850853375, |
|
"latency_50": 396.410306, |
|
"latency_90": 411.3193268, |
|
"latency_95": 412.24714525, |
|
"latency_99": 415.15970197, |
|
"latency_999": 416.37122889700004 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 336, |
|
"throughput": 22.4, |
|
"latency_mean": 44.668419485119045, |
|
"latency_std": 4.092836734909812, |
|
"latency_50": 46.0771455, |
|
"latency_90": 46.8151985, |
|
"latency_95": 47.12449375, |
|
"latency_99": 47.4169311, |
|
"latency_999": 47.675663060000005 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 800, |
|
"throughput": 53.33, |
|
"latency_mean": 18.77007987125, |
|
"latency_std": 1.3253872815877223, |
|
"latency_50": 18.32753, |
|
"latency_90": 21.5042558, |
|
"latency_95": 21.83777205, |
|
"latency_99": 22.11860088, |
|
"latency_999": 22.523989289000014 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"precision": 0.9358012339503085, |
|
"recall": 0.9444631437226523, |
|
"f1": 0.9401122372057961, |
|
"accuracy": 0.9882013940267124 |
|
}, |
|
"optimized": { |
|
"precision": 0.05001838911364472, |
|
"recall": 0.16021541568495457, |
|
"f1": 0.07623623623623622, |
|
"accuracy": 0.31141700089560376 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
}, |
|
{ |
|
"model_name_or_path": "elastic/distilbert-base-uncased-finetuned-conll03-english", |
|
"task": "token-classification", |
|
"task_args": null, |
|
"dataset": { |
|
"path": "conll2003", |
|
"eval_split": "validation", |
|
"data_keys": { |
|
"primary": "tokens", |
|
"secondary": null |
|
}, |
|
"ref_keys": [ |
|
"ner_tags" |
|
], |
|
"name": null, |
|
"calibration_split": "train" |
|
}, |
|
"quantization_approach": "dynamic", |
|
"operators_to_quantize": [ |
|
"Add", |
|
"MatMul" |
|
], |
|
"node_exclusion": [ |
|
"layernorm", |
|
"gelu", |
|
"residual", |
|
"gather", |
|
"softmax" |
|
], |
|
"aware_training": false, |
|
"per_channel": false, |
|
"calibration": { |
|
"method": "minmax", |
|
"num_calibration_samples": 100, |
|
"calibration_histogram_percentile": null, |
|
"calibration_moving_average": null, |
|
"calibration_moving_average_constant": null |
|
}, |
|
"framework": "onnxruntime", |
|
"framework_args": { |
|
"opset": 11, |
|
"optimization_level": 1 |
|
}, |
|
"hardware": "Architecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nByte Order: Little Endian\nAddress sizes: 46 bits physical, 48 bits virtual\nCPU(s): 8\nOn-line CPU(s) list: 0-7\nThread(s) per core: 2\nCore(s) per socket: 4\nSocket(s): 1\nNUMA node(s): 1\nVendor ID: GenuineIntel\nCPU family: 6\nModel: 85\nModel name: Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz\nStepping: 7\nCPU MHz: 3170.168\nBogoMIPS: 5000.00\nHypervisor vendor: KVM\nVirtualization type: full\nL1d cache: 128 KiB\nL1i cache: 128 KiB\nL2 cache: 4 MiB\nL3 cache: 35.8 MiB\nNUMA node0 CPU(s): 0-7\nVulnerability Itlb multihit: KVM: Vulnerable\nVulnerability L1tf: Mitigation; PTE Inversion\nVulnerability Mds: Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown\nVulnerability Meltdown: Mitigation; PTI\nVulnerability Spec store bypass: Vulnerable\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines, STIBP disabled, RSB filling\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke avx512_vnni\n", |
|
"versions": { |
|
"transformers": "4.20.1", |
|
"optimum": "1.2.3.dev0", |
|
"optimum_hash": "5c9af4e5f93c7e9bd523563230732b49603dc4d7" |
|
}, |
|
"evaluation": { |
|
"time": [ |
|
{ |
|
"batch_size": 1, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 456, |
|
"throughput": 30.4, |
|
"latency_mean": 32.90015813157895, |
|
"latency_std": 0.9986947055465656, |
|
"latency_50": 32.8186695, |
|
"latency_90": 34.223269, |
|
"latency_95": 34.7373955, |
|
"latency_99": 35.79150405, |
|
"latency_999": 37.31698694000001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 2133, |
|
"throughput": 142.2, |
|
"latency_mean": 7.03493227238631, |
|
"latency_std": 0.35354984774223724, |
|
"latency_50": 6.950204, |
|
"latency_90": 7.5335166, |
|
"latency_95": 7.5981322, |
|
"latency_99": 7.7467418, |
|
"latency_999": 7.949874583999998 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 128, |
|
"throughput": 8.53, |
|
"latency_mean": 117.711207453125, |
|
"latency_std": 0.3039802910847197, |
|
"latency_50": 117.713083, |
|
"latency_90": 118.1415121, |
|
"latency_95": 118.31928495, |
|
"latency_99": 118.40991929, |
|
"latency_999": 118.45796044400001 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 342, |
|
"throughput": 22.8, |
|
"latency_mean": 43.9346932251462, |
|
"latency_std": 1.9823168868846608, |
|
"latency_50": 44.48181, |
|
"latency_90": 44.9406133, |
|
"latency_95": 45.33209005, |
|
"latency_99": 46.44872295999999, |
|
"latency_999": 47.516010738999995 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 142, |
|
"throughput": 9.47, |
|
"latency_mean": 106.45585029577465, |
|
"latency_std": 13.710361788824905, |
|
"latency_50": 115.6649905, |
|
"latency_90": 116.2565268, |
|
"latency_95": 116.41340004999999, |
|
"latency_99": 116.79343428, |
|
"latency_999": 119.25626965200003 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 355, |
|
"throughput": 23.67, |
|
"latency_mean": 42.35345902253521, |
|
"latency_std": 0.16699971956793372, |
|
"latency_50": 42.330536, |
|
"latency_90": 42.578397200000005, |
|
"latency_95": 42.637379, |
|
"latency_99": 42.80914288, |
|
"latency_999": 43.008794852 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 272, |
|
"throughput": 18.13, |
|
"latency_mean": 55.20024977941176, |
|
"latency_std": 0.9356174611392704, |
|
"latency_50": 55.154456, |
|
"latency_90": 56.3394606, |
|
"latency_95": 56.87050635, |
|
"latency_99": 57.61698746000001, |
|
"latency_999": 58.883503068999985 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 584, |
|
"throughput": 38.93, |
|
"latency_mean": 25.724440731164385, |
|
"latency_std": 0.1774167175481294, |
|
"latency_50": 25.694553, |
|
"latency_90": 25.9448093, |
|
"latency_95": 26.0568756, |
|
"latency_99": 26.254885369999997, |
|
"latency_999": 26.525804783000005 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 32, |
|
"baseline": { |
|
"nb_forwards": 214, |
|
"throughput": 14.27, |
|
"latency_mean": 70.41022843925234, |
|
"latency_std": 0.9857357168050909, |
|
"latency_50": 70.4451575, |
|
"latency_90": 71.6293733, |
|
"latency_95": 71.97065065000001, |
|
"latency_99": 72.92320712, |
|
"latency_999": 73.059142452 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 662, |
|
"throughput": 44.13, |
|
"latency_mean": 22.66916344410876, |
|
"latency_std": 0.1531962446356845, |
|
"latency_50": 22.644899, |
|
"latency_90": 22.882265699999998, |
|
"latency_95": 22.93566205, |
|
"latency_99": 23.106536690000002, |
|
"latency_999": 23.233784632 |
|
} |
|
}, |
|
{ |
|
"batch_size": 4, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 68, |
|
"throughput": 4.53, |
|
"latency_mean": 220.69089520588233, |
|
"latency_std": 2.3715499250111147, |
|
"latency_50": 220.928956, |
|
"latency_90": 223.564325, |
|
"latency_95": 224.24846325, |
|
"latency_99": 225.59921588999998, |
|
"latency_999": 225.913941489 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 160, |
|
"throughput": 10.67, |
|
"latency_mean": 94.33282233125, |
|
"latency_std": 1.3440036091128054, |
|
"latency_50": 94.0403795, |
|
"latency_90": 96.10274629999999, |
|
"latency_95": 96.99199729999998, |
|
"latency_99": 98.02360016, |
|
"latency_999": 99.111505656 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 91, |
|
"throughput": 6.07, |
|
"latency_mean": 165.66658007692308, |
|
"latency_std": 9.200627894933877, |
|
"latency_50": 163.47137, |
|
"latency_90": 169.773734, |
|
"latency_95": 172.0753955, |
|
"latency_99": 213.2899759, |
|
"latency_999": 214.50856338999998 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 172, |
|
"throughput": 11.47, |
|
"latency_mean": 87.71301163953488, |
|
"latency_std": 1.0979033087486965, |
|
"latency_50": 87.4641435, |
|
"latency_90": 89.3678849, |
|
"latency_95": 89.7342365, |
|
"latency_99": 90.56987919, |
|
"latency_999": 90.839936162 |
|
} |
|
}, |
|
{ |
|
"batch_size": 8, |
|
"input_length": 128, |
|
"baseline": { |
|
"nb_forwards": 39, |
|
"throughput": 2.6, |
|
"latency_mean": 391.16164482051283, |
|
"latency_std": 24.77584642803748, |
|
"latency_50": 397.925266, |
|
"latency_90": 401.79618239999996, |
|
"latency_95": 402.9476371, |
|
"latency_99": 403.36617936, |
|
"latency_999": 403.412153736 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 82, |
|
"throughput": 5.47, |
|
"latency_mean": 183.34596223170732, |
|
"latency_std": 1.9414583808520627, |
|
"latency_50": 182.9643005, |
|
"latency_90": 185.9883817, |
|
"latency_95": 187.12977195, |
|
"latency_99": 188.01645273, |
|
"latency_999": 188.707885173 |
|
} |
|
}, |
|
{ |
|
"batch_size": 1, |
|
"input_length": 64, |
|
"baseline": { |
|
"nb_forwards": 254, |
|
"throughput": 16.93, |
|
"latency_mean": 59.14541264566929, |
|
"latency_std": 1.0597184999316425, |
|
"latency_50": 59.1676845, |
|
"latency_90": 60.564947700000005, |
|
"latency_95": 60.97932805, |
|
"latency_99": 61.52093252, |
|
"latency_999": 61.899637246 |
|
}, |
|
"optimized": { |
|
"nb_forwards": 1103, |
|
"throughput": 73.53, |
|
"latency_mean": 13.600328429737079, |
|
"latency_std": 0.19450611359009803, |
|
"latency_50": 13.567231, |
|
"latency_90": 13.8727804, |
|
"latency_95": 14.010098300000001, |
|
"latency_99": 14.15573608, |
|
"latency_999": 14.268220878 |
|
} |
|
} |
|
], |
|
"others": { |
|
"baseline": { |
|
"precision": 0.9358012339503085, |
|
"recall": 0.9444631437226523, |
|
"f1": 0.9401122372057961, |
|
"accuracy": 0.9882013940267124 |
|
}, |
|
"optimized": { |
|
"precision": 0.9337560487235108, |
|
"recall": 0.9417704476607203, |
|
"f1": 0.9377461248428991, |
|
"accuracy": 0.9878314707371209 |
|
} |
|
} |
|
}, |
|
"max_eval_samples": null, |
|
"time_benchmark_args": { |
|
"duration": 15, |
|
"warmup_runs": 5 |
|
}, |
|
"model_type": "distilbert" |
|
} |
|
] |