import os import subprocess import sys import argparse import time sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) KB = 1024 MB = 1024 * 1024 GB = 1024 * 1024 * 1024 RED = "\033[91m" YELLOW = "\033[93m" GREEN = "\033[92m" RESET = "\033[0m" def check_and_install_zipnn(): try: import zipnn except ImportError: print("zipnn not found. Installing...") subprocess.check_call( [ sys.executable, "-m", "pip", "install", "zipnn", "--upgrade", ] ) import zipnn def parse_streaming_chunk_size( streaming_chunk_size, ): if str(streaming_chunk_size).isdigit(): final = int(streaming_chunk_size) else: size_value = int(streaming_chunk_size[:-2]) size_unit = streaming_chunk_size[-2].lower() if size_unit == "k": final = KB * size_value elif size_unit == "m": final = MB * size_value elif size_unit == "g": final = GB * size_value else: raise ValueError(f"Invalid size unit: {size_unit}. Use 'k', 'm', or 'g'.") return final def compress_file( input_file, dtype="", streaming_chunk_size=1048576, delete=False, force=False, hf_cache=False, ): import zipnn streaming_chunk_size = parse_streaming_chunk_size(streaming_chunk_size) full_path = input_file if not os.path.exists(full_path): print(f"{RED}File not found{RESET}") return compressed_path = full_path + ".znn" if not force and os.path.exists(compressed_path): user_input = ( input(f"{compressed_path} already exists; overwrite (y/n)? ").strip().lower() ) if user_input not in ("yes", "y"): print(f"Skipping {full_path}...") return print(f"Compressing {full_path}...") # output_file = input_file + ".znn" if dtype: zpn = zipnn.ZipNN( bytearray_dtype="float32", is_streaming=True, streaming_chunk_kb=streaming_chunk_size, ) else: zpn = zipnn.ZipNN( is_streaming=True, streaming_chunk_kb=streaming_chunk_size, ) file_size_before = 0 file_size_after = 0 start_time = time.time() with open(input_file, "rb") as infile, open(output_file, "wb") as outfile: chunk = infile.read() file_size_before += len(chunk) compressed_chunk = zpn.compress(chunk) if compressed_chunk: file_size_after += len(compressed_chunk) outfile.write(compressed_chunk) end_time = time.time() - start_time print(f"Compressed {input_file} to {output_file}") print( f"{GREEN}Original size: {file_size_before/GB:.02f}GB size after compression: {file_size_after/GB:.02f}GB, Remaining size is {file_size_after/file_size_before*100:.02f}% of original, time: {end_time:.02f}{RESET}" ) if delete and not hf_cache: print(f"Deleting {full_path}...") os.remove(full_path) if hf_cache: # If the file is in the Hugging Face cache, fix the symlinks print(f"{YELLOW}Reorganizing Hugging Face cache...{RESET}") try: snapshot_path = os.path.dirname(input_file) blob_name = os.path.join(snapshot_path, os.readlink(input_file)) os.rename(output_file, blob_name) os.symlink(blob_name, output_file) if os.path.exists(input_file): os.remove(input_file) except Exception as e: raise Exception(f"Error reorganizing Hugging Face cache: {e}") if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python compress_files.py ") print("Example: python compress_files.py 'safetensors'") sys.exit(1) parser = argparse.ArgumentParser(description="Enter a file path to compress.") parser.add_argument( "input_file", type=str, help="Specify the path to the file to compress.", ) parser.add_argument( "--float32", action="store_true", help="A flag that triggers float32 compression", ) parser.add_argument( "--streaming_chunk_size", type=str, help="An optional streaming chunk size. The format is int (for size in Bytes) or int+KB/MB/GB. Default is 1MB", ) parser.add_argument( "--delete", action="store_true", help="A flag that triggers deletion of a single file instead of compression", ) parser.add_argument( "--force", action="store_true", help="A flag that forces overwriting when compressing.", ) parser.add_argument( "--hf_cache", action="store_true", help="A flag that indicates if the file is in the Hugging Face cache.", ) args = parser.parse_args() optional_kwargs = {} if args.float32: optional_kwargs["dtype"] = 32 if args.streaming_chunk_size is not None: optional_kwargs["streaming_chunk_size"] = args.streaming_chunk_size if args.delete: optional_kwargs["delete"] = args.delete if args.force: optional_kwargs["force"] = args.force if args.hf_cache: optional_kwargs["hf_cache"] = args.hf_cache check_and_install_zipnn() compress_file(args.input_file, **optional_kwargs)