File size: 1,611 Bytes
a5004f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
{
    "base_current_gpu_type": "NVIDIA A100-PCIE-40GB",
    "base_current_gpu_total_memory": 40339.3125,
    "base_token_generation_latency_sync": 46.801435470581055,
    "base_token_generation_latency_async": 46.831257082521915,
    "base_token_generation_throughput_sync": 0.02136686599342857,
    "base_token_generation_throughput_async": 0.021353259816149887,
    "base_token_generation_CO2_emissions": null,
    "base_token_generation_energy_consumption": null,
    "base_inference_latency_sync": 45.796761703491214,
    "base_inference_latency_async": 44.44742202758789,
    "base_inference_throughput_sync": 0.02183560502540439,
    "base_inference_throughput_async": 0.022498492699516162,
    "base_inference_CO2_emissions": null,
    "base_inference_energy_consumption": null,
    "smashed_current_gpu_type": "NVIDIA A100-PCIE-40GB",
    "smashed_current_gpu_total_memory": 40339.3125,
    "smashed_token_generation_latency_sync": 35.67549228668213,
    "smashed_token_generation_latency_async": 36.07236072421074,
    "smashed_token_generation_throughput_sync": 0.02803044711938862,
    "smashed_token_generation_throughput_async": 0.027722055887759754,
    "smashed_token_generation_CO2_emissions": null,
    "smashed_token_generation_energy_consumption": null,
    "smashed_inference_latency_sync": 59.643289184570314,
    "smashed_inference_latency_async": 34.55190658569336,
    "smashed_inference_throughput_sync": 0.016766345613593348,
    "smashed_inference_throughput_async": 0.028941962942619852,
    "smashed_inference_CO2_emissions": null,
    "smashed_inference_energy_consumption": null
}