Spaces:
Configuration error
Configuration error
/** | |
* @author jdiaz5513 | |
*/ | |
import { PACK_SPAN_THRESHOLD } from "../constants"; | |
import { MSG_PACK_NOT_WORD_ALIGNED } from "../errors"; | |
/** | |
* When packing a message there are two tags that are interpreted in a special way: `0x00` and `0xff`. | |
* | |
* @enum {number} | |
*/ | |
const enum PackedTag { | |
/** | |
* The tag is followed by a single byte which indicates a count of consecutive zero-valued words, minus 1. E.g. if the | |
* tag 0x00 is followed by 0x05, the sequence unpacks to 6 words of zero. | |
* | |
* Or, put another way: the tag is first decoded as if it were not special. Since none of the bits are set, it is | |
* followed by no bytes and expands to a word full of zeros. After that, the next byte is interpreted as a count of | |
* additional words that are also all-zero. | |
*/ | |
ZERO = 0x00, | |
/** | |
* The tag is followed by the bytes of the word (as if it weren’t special), but after those bytes is another byte with | |
* value N. Following that byte is N unpacked words that should be copied directly. | |
* | |
* These unpacked words may contain zeroes; in this implementation a minimum of PACK_SPAN_THRESHOLD zero bytes are | |
* written before ending the span. | |
* | |
* The purpose of this rule is to minimize the impact of packing on data that doesn’t contain any zeros – in | |
* particular, long text blobs. Because of this rule, the worst-case space overhead of packing is 2 bytes per 2 KiB of | |
* input (256 words = 2KiB). | |
*/ | |
SPAN = 0xff, | |
} | |
/** | |
* Compute the Hamming weight (number of bits set to 1) of a number. Used to figure out how many bytes follow a tag byte | |
* while computing the size of a packed message. | |
* | |
* WARNING: Using this with floating point numbers will void your warranty. | |
* | |
* @param {number} x A real integer. | |
* @returns {number} The hamming weight (integer). | |
*/ | |
export function getHammingWeight(x: number): number { | |
// Thanks, HACKMEM! | |
let w = x - ((x >> 1) & 0x55555555); | |
w = (w & 0x33333333) + ((w >> 2) & 0x33333333); | |
return (((w + (w >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24; | |
} | |
export type byte = number; | |
/** | |
* Compute the tag byte from the 8 bytes of a 64-bit word. | |
* | |
* @param {byte} a The first byte. | |
* @param {byte} b The second byte. | |
* @param {byte} c The third byte. | |
* @param {byte} d The fourth byte. | |
* @param {byte} e The fifth byte. | |
* @param {byte} f The sixth byte. | |
* @param {byte} g The seventh byte. | |
* @param {byte} h The eighth byte (phew!). | |
* @returns {number} The tag byte. | |
*/ | |
export function getTagByte(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number { | |
// Yes, it's pretty. Don't touch it. | |
return ( | |
(a === 0 ? 0 : 0b00000001) | | |
(b === 0 ? 0 : 0b00000010) | | |
(c === 0 ? 0 : 0b00000100) | | |
(d === 0 ? 0 : 0b00001000) | | |
(e === 0 ? 0 : 0b00010000) | | |
(f === 0 ? 0 : 0b00100000) | | |
(g === 0 ? 0 : 0b01000000) | | |
(h === 0 ? 0 : 0b10000000) | |
); | |
} | |
/** | |
* Efficiently calculate the length of a packed Cap'n Proto message. | |
* | |
* @export | |
* @param {ArrayBuffer} packed The packed message. | |
* @returns {number} The length of the unpacked message in bytes. | |
*/ | |
export function getUnpackedByteLength(packed: ArrayBuffer): number { | |
const p = new Uint8Array(packed); | |
let wordLength = 0; | |
let lastTag = 0x77; | |
for (let i = 0; i < p.byteLength; ) { | |
const tag = p[i]; | |
if (lastTag === PackedTag.ZERO) { | |
wordLength += tag; | |
i++; | |
lastTag = 0x77; | |
} else if (lastTag === PackedTag.SPAN) { | |
wordLength += tag; | |
i += tag * 8 + 1; | |
lastTag = 0x77; | |
} else { | |
wordLength++; | |
i += getHammingWeight(tag) + 1; | |
lastTag = tag; | |
} | |
} | |
return wordLength * 8; | |
} | |
/** | |
* Compute the number of zero bytes that occur in a given 64-bit word, provided as eight separate bytes. | |
* | |
* @param {byte} a The first byte. | |
* @param {byte} b The second byte. | |
* @param {byte} c The third byte. | |
* @param {byte} d The fourth byte. | |
* @param {byte} e The fifth byte. | |
* @param {byte} f The sixth byte. | |
* @param {byte} g The seventh byte. | |
* @param {byte} h The eighth byte (phew!). | |
* @returns {number} The number of these bytes that are zero. | |
*/ | |
export function getZeroByteCount(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number { | |
return ( | |
(a === 0 ? 1 : 0) + | |
(b === 0 ? 1 : 0) + | |
(c === 0 ? 1 : 0) + | |
(d === 0 ? 1 : 0) + | |
(e === 0 ? 1 : 0) + | |
(f === 0 ? 1 : 0) + | |
(g === 0 ? 1 : 0) + | |
(h === 0 ? 1 : 0) | |
); | |
} | |
/** | |
* Pack a section of a Cap'n Proto message into a compressed format. This will efficiently compress zero bytes (which | |
* are common in idiomatic Cap'n Proto messages) into a compact form. | |
* | |
* For stream-framed messages this is called once for the frame header and once again for each segment in the message. | |
* | |
* The returned array buffer is trimmed to the exact size of the packed message with a single copy operation at the end. | |
* This should be decent on CPU time but does require quite a lot of memory (a normal array is filled up with each | |
* packed byte until the packing is complete). | |
* | |
* @export | |
* @param {ArrayBuffer} unpacked The message to pack. | |
* @param {number} [byteOffset] Starting byte offset to read bytes from, defaults to 0. | |
* @param {number} [byteLength] Total number of bytes to read, defaults to the remainder of the buffer contents. | |
* @returns {ArrayBuffer} A packed version of the message. | |
*/ | |
export function pack(unpacked: ArrayBuffer, byteOffset = 0, byteLength?: number): ArrayBuffer { | |
if (unpacked.byteLength % 8 !== 0) throw new Error(MSG_PACK_NOT_WORD_ALIGNED); | |
const src = new Uint8Array(unpacked, byteOffset, byteLength); | |
// TODO: Maybe we should do this with buffers? This costs more than 8x the final compressed size in temporary RAM. | |
const dst: number[] = []; | |
/* Just have to be sure it's neither ZERO nor SPAN. */ | |
let lastTag = 0x77; | |
/** This is where we need to remember to write the SPAN tag (0xff). */ | |
let spanTagOffset = NaN; | |
/** How many words have been copied during the current span. */ | |
let spanWordLength = 0; | |
/** | |
* When this hits zero, we've had PACK_SPAN_THRESHOLD zero bytes pass by and it's time to bail from the span. | |
*/ | |
let spanThreshold = PACK_SPAN_THRESHOLD; | |
for (let srcByteOffset = 0; srcByteOffset < src.byteLength; srcByteOffset += 8) { | |
/** Read in the entire word. Yes, this feels silly but it's fast! */ | |
const a = src[srcByteOffset]; | |
const b = src[srcByteOffset + 1]; | |
const c = src[srcByteOffset + 2]; | |
const d = src[srcByteOffset + 3]; | |
const e = src[srcByteOffset + 4]; | |
const f = src[srcByteOffset + 5]; | |
const g = src[srcByteOffset + 6]; | |
const h = src[srcByteOffset + 7]; | |
const tag = getTagByte(a, b, c, d, e, f, g, h); | |
/** If this is true we'll skip the normal word write logic after the switch statement. */ | |
let skipWriteWord = true; | |
switch (lastTag) { | |
case PackedTag.ZERO: | |
// We're writing a span of words with all zeroes in them. See if we need to bail out of the fast path. | |
if (tag !== PackedTag.ZERO || spanWordLength >= 0xff) { | |
// There's a bit in there or we got too many zeroes. Damn, we need to bail. | |
dst.push(spanWordLength); | |
spanWordLength = 0; | |
skipWriteWord = false; | |
} else { | |
// Kay, let's quickly inc this and go. | |
spanWordLength++; | |
} | |
break; | |
case PackedTag.SPAN: { | |
// We're writing a span of nonzero words. | |
const zeroCount = getZeroByteCount(a, b, c, d, e, f, g, h); | |
// See if we need to bail now. | |
spanThreshold -= zeroCount; | |
if (spanThreshold <= 0 || spanWordLength >= 0xff) { | |
// Alright, time to get packing again. Write the number of words we skipped to the beginning of the span. | |
dst[spanTagOffset] = spanWordLength; | |
spanWordLength = 0; | |
spanThreshold = PACK_SPAN_THRESHOLD; | |
// We have to write this word normally. | |
skipWriteWord = false; | |
} else { | |
// Just write this word verbatim. | |
dst.push(a, b, c, d, e, f, g, h); | |
spanWordLength++; | |
} | |
break; | |
} | |
default: | |
// Didn't get a special tag last time, let's write this as normal. | |
skipWriteWord = false; | |
break; | |
} | |
// A goto is fast, idk why people keep hatin'. | |
if (skipWriteWord) continue; | |
dst.push(tag); | |
lastTag = tag; | |
if (a !== 0) dst.push(a); | |
if (b !== 0) dst.push(b); | |
if (c !== 0) dst.push(c); | |
if (d !== 0) dst.push(d); | |
if (e !== 0) dst.push(e); | |
if (f !== 0) dst.push(f); | |
if (g !== 0) dst.push(g); | |
if (h !== 0) dst.push(h); | |
// Record the span tag offset if needed, making sure to actually leave room for it. | |
if (tag === PackedTag.SPAN) { | |
spanTagOffset = dst.length; | |
dst.push(0); | |
} | |
} | |
// We're done. If we were writing a span let's finish it. | |
if (lastTag === PackedTag.ZERO) { | |
dst.push(spanWordLength); | |
} else if (lastTag === PackedTag.SPAN) { | |
dst[spanTagOffset] = spanWordLength; | |
} | |
return new Uint8Array(dst).buffer; | |
} | |
/** | |
* Unpack a compressed Cap'n Proto message into a new ArrayBuffer. | |
* | |
* Unlike the `pack` function, this is able to efficiently determine the exact size needed for the output buffer and | |
* runs considerably more efficiently. | |
* | |
* @export | |
* @param {ArrayBuffer} packed An array buffer containing the packed message. | |
* @returns {ArrayBuffer} The unpacked message. | |
*/ | |
export function unpack(packed: ArrayBuffer): ArrayBuffer { | |
// We have no choice but to read the packed buffer one byte at a time. | |
const src = new Uint8Array(packed); | |
const dst = new Uint8Array(new ArrayBuffer(getUnpackedByteLength(packed))); | |
/** The last tag byte that we've seen - it starts at a "neutral" value. */ | |
let lastTag = 0x77; | |
for (let srcByteOffset = 0, dstByteOffset = 0; srcByteOffset < src.byteLength; ) { | |
const tag = src[srcByteOffset]; | |
if (lastTag === PackedTag.ZERO) { | |
// We have a span of zeroes. New array buffers are guaranteed to be initialized to zero so we just seek ahead. | |
dstByteOffset += tag * 8; | |
srcByteOffset++; | |
lastTag = 0x77; | |
} else if (lastTag === PackedTag.SPAN) { | |
// We have a span of unpacked bytes. Copy them verbatim from the source buffer. | |
const spanByteLength = tag * 8; | |
dst.set(src.subarray(srcByteOffset + 1, srcByteOffset + 1 + spanByteLength), dstByteOffset); | |
dstByteOffset += spanByteLength; | |
srcByteOffset += 1 + spanByteLength; | |
lastTag = 0x77; | |
} else { | |
// Okay, a normal tag. Let's read past the tag and copy bytes that have a bit set in the tag. | |
srcByteOffset++; | |
for (let i = 1; i <= 0b10000000; i <<= 1) { | |
// We only need to actually touch `dst` if there's a nonzero byte (it's already initialized to zeroes). | |
if ((tag & i) !== 0) dst[dstByteOffset] = src[srcByteOffset++]; | |
dstByteOffset++; | |
} | |
lastTag = tag; | |
} | |
} | |
return dst.buffer; | |
} | |