compilade
/

quant-tests

compilade commited on Dec 28, 2024

Commit

694d40b

1 Parent(s): d1fe1b7

Add some error handling to the Python-based bench script

Files changed (1) hide show

bench-TriLMs.py CHANGED Viewed

@@ -58,7 +58,7 @@ def quantize(types: Sequence[str] = ALL_TYPES, sizes: Sequence[str] = MODEL_SIZE
         source = MODEL_DIR / f"TriLM_{size}B_Unpacked-TQ1_0-F16.gguf"
         for ty in types:
             target = MODEL_DIR / f"TriLM_{size}B_Unpacked-{ty}.gguf"
-            if not target.exists():
                 command = shlex.join(
                     (
                         str(LLAMA_CPP_PATH / "build" / "bin" / "llama-quantize"),
@@ -69,7 +69,10 @@ def quantize(types: Sequence[str] = ALL_TYPES, sizes: Sequence[str] = MODEL_SIZE
                     )
                 )
                 logger.info("Running: %s", command)
-                os.system(command)
 def llama_bench(
@@ -107,6 +110,9 @@ def llama_bench(
                 logger.info("Running: %s", " ".join(command))
                 result = subprocess.run(command, capture_output=True)
                 logger.debug(result.stderr.decode())
                 new_output = json.loads(result.stdout)
                 logger.info(json.dumps(new_output, indent=4))

         source = MODEL_DIR / f"TriLM_{size}B_Unpacked-TQ1_0-F16.gguf"
         for ty in types:
             target = MODEL_DIR / f"TriLM_{size}B_Unpacked-{ty}.gguf"
+            if not target.exists() or target.is_file() and target.stat().st_size == 0:
                 command = shlex.join(
                     (
                         str(LLAMA_CPP_PATH / "build" / "bin" / "llama-quantize"),
                     )
                 )
                 logger.info("Running: %s", command)
+                ret = os.system(command)
+                if ret != 0:
+                    logger.error("Failed to quantize to %s", target)
+                # Should it still continue?
 def llama_bench(
                 logger.info("Running: %s", " ".join(command))
                 result = subprocess.run(command, capture_output=True)
                 logger.debug(result.stderr.decode())
+                if result.returncode != 0:
+                    logger.error("Failed to run %s", " ".join(command))
+                    break;
                 new_output = json.loads(result.stdout)
                 logger.info(json.dumps(new_output, indent=4))