Spaces:

W404NET
/

Telegram-Chat-Bot

Configuration error

Telegram-Chat-Bot / node_modules /capnp-ts /src /serialization /packing.ts

Lee Thanh

Upload 3012 files

5641073 12 months ago

11.1 kB

	/**
	* @author jdiaz5513
	*/

	import { PACK_SPAN_THRESHOLD } from "../constants";
	import { MSG_PACK_NOT_WORD_ALIGNED } from "../errors";

	/**
	* When packing a message there are two tags that are interpreted in a special way: `0x00` and `0xff`.
	*
	* @enum {number}
	*/

	const enum PackedTag {
	/**
	* The tag is followed by a single byte which indicates a count of consecutive zero-valued words, minus 1. E.g. if the
	* tag 0x00 is followed by 0x05, the sequence unpacks to 6 words of zero.
	*
	* Or, put another way: the tag is first decoded as if it were not special. Since none of the bits are set, it is
	* followed by no bytes and expands to a word full of zeros. After that, the next byte is interpreted as a count of
	* additional words that are also all-zero.
	*/

	ZERO = 0x00,

	/**
	* The tag is followed by the bytes of the word (as if it weren’t special), but after those bytes is another byte with
	* value N. Following that byte is N unpacked words that should be copied directly.
	*
	* These unpacked words may contain zeroes; in this implementation a minimum of PACK_SPAN_THRESHOLD zero bytes are
	* written before ending the span.
	*
	* The purpose of this rule is to minimize the impact of packing on data that doesn’t contain any zeros – in
	* particular, long text blobs. Because of this rule, the worst-case space overhead of packing is 2 bytes per 2 KiB of
	* input (256 words = 2KiB).
	*/

	SPAN = 0xff,
	}

	/**
	* Compute the Hamming weight (number of bits set to 1) of a number. Used to figure out how many bytes follow a tag byte
	* while computing the size of a packed message.
	*
	* WARNING: Using this with floating point numbers will void your warranty.
	*
	* @param {number} x A real integer.
	* @returns {number} The hamming weight (integer).
	*/

	export function getHammingWeight(x: number): number {
	// Thanks, HACKMEM!

	let w = x - ((x >> 1) & 0x55555555);
	w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
	return (((w + (w >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;
	}

	export type byte = number;

	/**
	* Compute the tag byte from the 8 bytes of a 64-bit word.
	*
	* @param {byte} a The first byte.
	* @param {byte} b The second byte.
	* @param {byte} c The third byte.
	* @param {byte} d The fourth byte.
	* @param {byte} e The fifth byte.
	* @param {byte} f The sixth byte.
	* @param {byte} g The seventh byte.
	* @param {byte} h The eighth byte (phew!).
	* @returns {number} The tag byte.
	*/

	export function getTagByte(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number {
	// Yes, it's pretty. Don't touch it.

	return (
	(a === 0 ? 0 : 0b00000001) \|
	(b === 0 ? 0 : 0b00000010) \|
	(c === 0 ? 0 : 0b00000100) \|
	(d === 0 ? 0 : 0b00001000) \|
	(e === 0 ? 0 : 0b00010000) \|
	(f === 0 ? 0 : 0b00100000) \|
	(g === 0 ? 0 : 0b01000000) \|
	(h === 0 ? 0 : 0b10000000)
	);
	}

	/**
	* Efficiently calculate the length of a packed Cap'n Proto message.
	*
	* @export
	* @param {ArrayBuffer} packed The packed message.
	* @returns {number} The length of the unpacked message in bytes.
	*/

	export function getUnpackedByteLength(packed: ArrayBuffer): number {
	const p = new Uint8Array(packed);
	let wordLength = 0;
	let lastTag = 0x77;

	for (let i = 0; i < p.byteLength; ) {
	const tag = p[i];

	if (lastTag === PackedTag.ZERO) {
	wordLength += tag;

	i++;

	lastTag = 0x77;
	} else if (lastTag === PackedTag.SPAN) {
	wordLength += tag;

	i += tag * 8 + 1;

	lastTag = 0x77;
	} else {
	wordLength++;

	i += getHammingWeight(tag) + 1;

	lastTag = tag;
	}
	}

	return wordLength * 8;
	}

	/**
	* Compute the number of zero bytes that occur in a given 64-bit word, provided as eight separate bytes.
	*
	* @param {byte} a The first byte.
	* @param {byte} b The second byte.
	* @param {byte} c The third byte.
	* @param {byte} d The fourth byte.
	* @param {byte} e The fifth byte.
	* @param {byte} f The sixth byte.
	* @param {byte} g The seventh byte.
	* @param {byte} h The eighth byte (phew!).
	* @returns {number} The number of these bytes that are zero.
	*/

	export function getZeroByteCount(a: byte, b: byte, c: byte, d: byte, e: byte, f: byte, g: byte, h: byte): number {
	return (
	(a === 0 ? 1 : 0) +
	(b === 0 ? 1 : 0) +
	(c === 0 ? 1 : 0) +
	(d === 0 ? 1 : 0) +
	(e === 0 ? 1 : 0) +
	(f === 0 ? 1 : 0) +
	(g === 0 ? 1 : 0) +
	(h === 0 ? 1 : 0)
	);
	}

	/**
	* Pack a section of a Cap'n Proto message into a compressed format. This will efficiently compress zero bytes (which
	* are common in idiomatic Cap'n Proto messages) into a compact form.
	*
	* For stream-framed messages this is called once for the frame header and once again for each segment in the message.
	*
	* The returned array buffer is trimmed to the exact size of the packed message with a single copy operation at the end.
	* This should be decent on CPU time but does require quite a lot of memory (a normal array is filled up with each
	* packed byte until the packing is complete).
	*
	* @export
	* @param {ArrayBuffer} unpacked The message to pack.
	* @param {number} [byteOffset] Starting byte offset to read bytes from, defaults to 0.
	* @param {number} [byteLength] Total number of bytes to read, defaults to the remainder of the buffer contents.
	* @returns {ArrayBuffer} A packed version of the message.
	*/

	export function pack(unpacked: ArrayBuffer, byteOffset = 0, byteLength?: number): ArrayBuffer {
	if (unpacked.byteLength % 8 !== 0) throw new Error(MSG_PACK_NOT_WORD_ALIGNED);

	const src = new Uint8Array(unpacked, byteOffset, byteLength);

	// TODO: Maybe we should do this with buffers? This costs more than 8x the final compressed size in temporary RAM.

	const dst: number[] = [];

	/* Just have to be sure it's neither ZERO nor SPAN. */

	let lastTag = 0x77;

	/** This is where we need to remember to write the SPAN tag (0xff). */

	let spanTagOffset = NaN;

	/** How many words have been copied during the current span. */

	let spanWordLength = 0;

	/**
	* When this hits zero, we've had PACK_SPAN_THRESHOLD zero bytes pass by and it's time to bail from the span.
	*/

	let spanThreshold = PACK_SPAN_THRESHOLD;

	for (let srcByteOffset = 0; srcByteOffset < src.byteLength; srcByteOffset += 8) {
	/** Read in the entire word. Yes, this feels silly but it's fast! */

	const a = src[srcByteOffset];
	const b = src[srcByteOffset + 1];
	const c = src[srcByteOffset + 2];
	const d = src[srcByteOffset + 3];
	const e = src[srcByteOffset + 4];
	const f = src[srcByteOffset + 5];
	const g = src[srcByteOffset + 6];
	const h = src[srcByteOffset + 7];

	const tag = getTagByte(a, b, c, d, e, f, g, h);

	/** If this is true we'll skip the normal word write logic after the switch statement. */

	let skipWriteWord = true;

	switch (lastTag) {
	case PackedTag.ZERO:
	// We're writing a span of words with all zeroes in them. See if we need to bail out of the fast path.

	if (tag !== PackedTag.ZERO \|\| spanWordLength >= 0xff) {
	// There's a bit in there or we got too many zeroes. Damn, we need to bail.

	dst.push(spanWordLength);
	spanWordLength = 0;

	skipWriteWord = false;
	} else {
	// Kay, let's quickly inc this and go.

	spanWordLength++;
	}

	break;

	case PackedTag.SPAN: {
	// We're writing a span of nonzero words.

	const zeroCount = getZeroByteCount(a, b, c, d, e, f, g, h);

	// See if we need to bail now.

	spanThreshold -= zeroCount;

	if (spanThreshold <= 0 \|\| spanWordLength >= 0xff) {
	// Alright, time to get packing again. Write the number of words we skipped to the beginning of the span.

	dst[spanTagOffset] = spanWordLength;
	spanWordLength = 0;

	spanThreshold = PACK_SPAN_THRESHOLD;

	// We have to write this word normally.

	skipWriteWord = false;
	} else {
	// Just write this word verbatim.

	dst.push(a, b, c, d, e, f, g, h);

	spanWordLength++;
	}

	break;
	}
	default:
	// Didn't get a special tag last time, let's write this as normal.

	skipWriteWord = false;

	break;
	}

	// A goto is fast, idk why people keep hatin'.
	if (skipWriteWord) continue;

	dst.push(tag);
	lastTag = tag;

	if (a !== 0) dst.push(a);
	if (b !== 0) dst.push(b);
	if (c !== 0) dst.push(c);
	if (d !== 0) dst.push(d);
	if (e !== 0) dst.push(e);
	if (f !== 0) dst.push(f);
	if (g !== 0) dst.push(g);
	if (h !== 0) dst.push(h);

	// Record the span tag offset if needed, making sure to actually leave room for it.

	if (tag === PackedTag.SPAN) {
	spanTagOffset = dst.length;

	dst.push(0);
	}
	}

	// We're done. If we were writing a span let's finish it.

	if (lastTag === PackedTag.ZERO) {
	dst.push(spanWordLength);
	} else if (lastTag === PackedTag.SPAN) {
	dst[spanTagOffset] = spanWordLength;
	}

	return new Uint8Array(dst).buffer;
	}

	/**
	* Unpack a compressed Cap'n Proto message into a new ArrayBuffer.
	*
	* Unlike the `pack` function, this is able to efficiently determine the exact size needed for the output buffer and
	* runs considerably more efficiently.
	*
	* @export
	* @param {ArrayBuffer} packed An array buffer containing the packed message.
	* @returns {ArrayBuffer} The unpacked message.
	*/

	export function unpack(packed: ArrayBuffer): ArrayBuffer {
	// We have no choice but to read the packed buffer one byte at a time.

	const src = new Uint8Array(packed);
	const dst = new Uint8Array(new ArrayBuffer(getUnpackedByteLength(packed)));

	/** The last tag byte that we've seen - it starts at a "neutral" value. */

	let lastTag = 0x77;

	for (let srcByteOffset = 0, dstByteOffset = 0; srcByteOffset < src.byteLength; ) {
	const tag = src[srcByteOffset];

	if (lastTag === PackedTag.ZERO) {
	// We have a span of zeroes. New array buffers are guaranteed to be initialized to zero so we just seek ahead.

	dstByteOffset += tag * 8;

	srcByteOffset++;

	lastTag = 0x77;
	} else if (lastTag === PackedTag.SPAN) {
	// We have a span of unpacked bytes. Copy them verbatim from the source buffer.

	const spanByteLength = tag * 8;

	dst.set(src.subarray(srcByteOffset + 1, srcByteOffset + 1 + spanByteLength), dstByteOffset);

	dstByteOffset += spanByteLength;
	srcByteOffset += 1 + spanByteLength;

	lastTag = 0x77;
	} else {
	// Okay, a normal tag. Let's read past the tag and copy bytes that have a bit set in the tag.

	srcByteOffset++;

	for (let i = 1; i <= 0b10000000; i <<= 1) {
	// We only need to actually touch `dst` if there's a nonzero byte (it's already initialized to zeroes).

	if ((tag & i) !== 0) dst[dstByteOffset] = src[srcByteOffset++];

	dstByteOffset++;
	}

	lastTag = tag;
	}
	}

	return dst.buffer;
	}