Change where mulmat_perf is run
Browse files- bench-TriLMs.py +3 -1
bench-TriLMs.py
CHANGED
@@ -169,24 +169,26 @@ if __name__ == "__main__":
|
|
169 |
MODEL_DIR = args.model_dir
|
170 |
|
171 |
results = []
|
|
|
172 |
repetitions: int = args.repetitions
|
173 |
|
174 |
if args.cpu:
|
175 |
gather_models()
|
176 |
build_llama_cpp(["-DGGML_NATIVE=ON", "-DGGML_CPU=ON"])
|
177 |
quantize()
|
|
|
178 |
results.extend(llama_bench(repetitions=repetitions))
|
179 |
|
180 |
if args.gpu:
|
181 |
gather_models()
|
182 |
build_llama_cpp(["-DGGML_NATIVE=ON", "-DGGML_CUDA=ON", "-DGGML_CUDA_F16=ON"])
|
183 |
quantize()
|
|
|
184 |
results.extend(llama_bench(repetitions=repetitions, types=GPU_TYPES))
|
185 |
|
186 |
cpuinfo = subprocess.run(["lscpu"], capture_output=True).stdout.decode(
|
187 |
encoding="utf-8"
|
188 |
)
|
189 |
-
mulmat_perf = test_backend_perf()
|
190 |
|
191 |
final_result = {
|
192 |
"cpuinfo": cpuinfo,
|
|
|
169 |
MODEL_DIR = args.model_dir
|
170 |
|
171 |
results = []
|
172 |
+
mulmat_perf = []
|
173 |
repetitions: int = args.repetitions
|
174 |
|
175 |
if args.cpu:
|
176 |
gather_models()
|
177 |
build_llama_cpp(["-DGGML_NATIVE=ON", "-DGGML_CPU=ON"])
|
178 |
quantize()
|
179 |
+
mulmat_perf.append(test_backend_perf())
|
180 |
results.extend(llama_bench(repetitions=repetitions))
|
181 |
|
182 |
if args.gpu:
|
183 |
gather_models()
|
184 |
build_llama_cpp(["-DGGML_NATIVE=ON", "-DGGML_CUDA=ON", "-DGGML_CUDA_F16=ON"])
|
185 |
quantize()
|
186 |
+
mulmat_perf.append(test_backend_perf())
|
187 |
results.extend(llama_bench(repetitions=repetitions, types=GPU_TYPES))
|
188 |
|
189 |
cpuinfo = subprocess.run(["lscpu"], capture_output=True).stdout.decode(
|
190 |
encoding="utf-8"
|
191 |
)
|
|
|
192 |
|
193 |
final_result = {
|
194 |
"cpuinfo": cpuinfo,
|