{ "base_current_gpu_type": "NVIDIA A100-PCIE-40GB", "base_current_gpu_total_memory": 40339.3125, "base_token_generation_latency_sync": 33.131116485595705, "base_token_generation_latency_async": 33.188691176474094, "base_token_generation_throughput_sync": 0.030183105976364137, "base_token_generation_throughput_async": 0.030130745279550314, "base_token_generation_CO2_emissions": null, "base_token_generation_energy_consumption": null, "base_inference_latency_sync": 32.26163177490234, "base_inference_latency_async": 30.899405479431152, "base_inference_throughput_sync": 0.030996572243377388, "base_inference_throughput_async": 0.03236308221741261, "base_inference_CO2_emissions": null, "base_inference_energy_consumption": null, "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB", "smashed_current_gpu_total_memory": 40339.3125, "smashed_token_generation_latency_sync": 24.905581283569337, "smashed_token_generation_latency_async": 25.39039347320795, "smashed_token_generation_throughput_sync": 0.04015164266251108, "smashed_token_generation_throughput_async": 0.03938497452019419, "smashed_token_generation_CO2_emissions": null, "smashed_token_generation_energy_consumption": null, "smashed_inference_latency_sync": 26.685030364990233, "smashed_inference_latency_async": 24.64604377746582, "smashed_inference_throughput_sync": 0.037474193820366146, "smashed_inference_throughput_async": 0.0405744633511652, "smashed_inference_CO2_emissions": null, "smashed_inference_energy_consumption": null }