compilade
/

quant-tests

compilade commited on Dec 28, 2024

Commit

9d41255

1 Parent(s): 7b12d2f

Change where mulmat_perf is run

Files changed (1) hide show

bench-TriLMs.py CHANGED Viewed

@@ -169,24 +169,26 @@ if __name__ == "__main__":
     MODEL_DIR = args.model_dir
     results = []
     repetitions: int = args.repetitions
     if args.cpu:
         gather_models()
         build_llama_cpp(["-DGGML_NATIVE=ON", "-DGGML_CPU=ON"])
         quantize()
         results.extend(llama_bench(repetitions=repetitions))
     if args.gpu:
         gather_models()
         build_llama_cpp(["-DGGML_NATIVE=ON", "-DGGML_CUDA=ON", "-DGGML_CUDA_F16=ON"])
         quantize()
         results.extend(llama_bench(repetitions=repetitions, types=GPU_TYPES))
     cpuinfo = subprocess.run(["lscpu"], capture_output=True).stdout.decode(
         encoding="utf-8"
     )
-    mulmat_perf = test_backend_perf()
     final_result = {
         "cpuinfo": cpuinfo,

     MODEL_DIR = args.model_dir
     results = []
+    mulmat_perf = []
     repetitions: int = args.repetitions
     if args.cpu:
         gather_models()
         build_llama_cpp(["-DGGML_NATIVE=ON", "-DGGML_CPU=ON"])
         quantize()
+        mulmat_perf.append(test_backend_perf())
         results.extend(llama_bench(repetitions=repetitions))
     if args.gpu:
         gather_models()
         build_llama_cpp(["-DGGML_NATIVE=ON", "-DGGML_CUDA=ON", "-DGGML_CUDA_F16=ON"])
         quantize()
+        mulmat_perf.append(test_backend_perf())
         results.extend(llama_bench(repetitions=repetitions, types=GPU_TYPES))
     cpuinfo = subprocess.run(["lscpu"], capture_output=True).stdout.decode(
         encoding="utf-8"
     )
     final_result = {
         "cpuinfo": cpuinfo,