Spaces:
Running
Running
File size: 5,502 Bytes
1719261 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
"use strict";
var Buffer = require("safer-buffer").Buffer;
// Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
// == UTF16-BE codec. ==========================================================
exports.utf16be = Utf16BECodec;
function Utf16BECodec() {
}
Utf16BECodec.prototype.encoder = Utf16BEEncoder;
Utf16BECodec.prototype.decoder = Utf16BEDecoder;
Utf16BECodec.prototype.bomAware = true;
// -- Encoding
function Utf16BEEncoder() {
}
Utf16BEEncoder.prototype.write = function(str) {
var buf = Buffer.from(str, 'ucs2');
for (var i = 0; i < buf.length; i += 2) {
var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp;
}
return buf;
}
Utf16BEEncoder.prototype.end = function() {
}
// -- Decoding
function Utf16BEDecoder() {
this.overflowByte = -1;
}
Utf16BEDecoder.prototype.write = function(buf) {
if (buf.length == 0)
return '';
var buf2 = Buffer.alloc(buf.length + 1),
i = 0, j = 0;
if (this.overflowByte !== -1) {
buf2[0] = buf[0];
buf2[1] = this.overflowByte;
i = 1; j = 2;
}
for (; i < buf.length-1; i += 2, j+= 2) {
buf2[j] = buf[i+1];
buf2[j+1] = buf[i];
}
this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1;
return buf2.slice(0, j).toString('ucs2');
}
Utf16BEDecoder.prototype.end = function() {
this.overflowByte = -1;
}
// == UTF-16 codec =============================================================
// Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
// Defaults to UTF-16LE, as it's prevalent and default in Node.
// http://en.wikipedia.org/wiki/UTF-16 and http://encoding.spec.whatwg.org/#utf-16le
// Decoder default can be changed: iconv.decode(buf, 'utf16', {defaultEncoding: 'utf-16be'});
// Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
exports.utf16 = Utf16Codec;
function Utf16Codec(codecOptions, iconv) {
this.iconv = iconv;
}
Utf16Codec.prototype.encoder = Utf16Encoder;
Utf16Codec.prototype.decoder = Utf16Decoder;
// -- Encoding (pass-through)
function Utf16Encoder(options, codec) {
options = options || {};
if (options.addBOM === undefined)
options.addBOM = true;
this.encoder = codec.iconv.getEncoder('utf-16le', options);
}
Utf16Encoder.prototype.write = function(str) {
return this.encoder.write(str);
}
Utf16Encoder.prototype.end = function() {
return this.encoder.end();
}
// -- Decoding
function Utf16Decoder(options, codec) {
this.decoder = null;
this.initialBufs = [];
this.initialBufsLen = 0;
this.options = options || {};
this.iconv = codec.iconv;
}
Utf16Decoder.prototype.write = function(buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
this.initialBufs.push(buf);
this.initialBufsLen += buf.length;
if (this.initialBufsLen < 16) // We need more bytes to use space heuristic (see below)
return '';
// We have enough bytes -> detect endianness.
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
var resStr = '';
for (var i = 0; i < this.initialBufs.length; i++)
resStr += this.decoder.write(this.initialBufs[i]);
this.initialBufs.length = this.initialBufsLen = 0;
return resStr;
}
return this.decoder.write(buf);
}
Utf16Decoder.prototype.end = function() {
if (!this.decoder) {
var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
var resStr = '';
for (var i = 0; i < this.initialBufs.length; i++)
resStr += this.decoder.write(this.initialBufs[i]);
var trail = this.decoder.end();
if (trail)
resStr += trail;
this.initialBufs.length = this.initialBufsLen = 0;
return resStr;
}
return this.decoder.end();
}
function detectEncoding(bufs, defaultEncoding) {
var b = [];
var charsProcessed = 0;
var asciiCharsLE = 0, asciiCharsBE = 0; // Number of ASCII chars when decoded as LE or BE.
outer_loop:
for (var i = 0; i < bufs.length; i++) {
var buf = bufs[i];
for (var j = 0; j < buf.length; j++) {
b.push(buf[j]);
if (b.length === 2) {
if (charsProcessed === 0) {
// Check BOM first.
if (b[0] === 0xFF && b[1] === 0xFE) return 'utf-16le';
if (b[0] === 0xFE && b[1] === 0xFF) return 'utf-16be';
}
if (b[0] === 0 && b[1] !== 0) asciiCharsBE++;
if (b[0] !== 0 && b[1] === 0) asciiCharsLE++;
b.length = 0;
charsProcessed++;
if (charsProcessed >= 100) {
break outer_loop;
}
}
}
}
// Make decisions.
// Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
// So, we count ASCII as if it was LE or BE, and decide from that.
if (asciiCharsBE > asciiCharsLE) return 'utf-16be';
if (asciiCharsBE < asciiCharsLE) return 'utf-16le';
// Couldn't decide (likely all zeros or not enough data).
return defaultEncoding || 'utf-16le';
}
|