diff --git a/browser/decode.js b/browser/decode.js index b826b14..ccf1e01 100644 --- a/browser/decode.js +++ b/browser/decode.js @@ -1,5 +1,7 @@ 'use strict'; +var utf8 = require('./utf8'); + function Decoder(buffer) { this.offset = 0; if (buffer instanceof ArrayBuffer) { @@ -13,47 +15,6 @@ function Decoder(buffer) { } } -function utf8Read(view, offset, length) { - var string = '', chr = 0; - for (var i = offset, end = offset + length; i < end; i++) { - var byte = view.getUint8(i); - if ((byte & 0x80) === 0x00) { - string += String.fromCharCode(byte); - continue; - } - if ((byte & 0xe0) === 0xc0) { - string += String.fromCharCode( - ((byte & 0x1f) << 6) | - (view.getUint8(++i) & 0x3f) - ); - continue; - } - if ((byte & 0xf0) === 0xe0) { - string += String.fromCharCode( - ((byte & 0x0f) << 12) | - ((view.getUint8(++i) & 0x3f) << 6) | - ((view.getUint8(++i) & 0x3f) << 0) - ); - continue; - } - if ((byte & 0xf8) === 0xf0) { - chr = ((byte & 0x07) << 18) | - ((view.getUint8(++i) & 0x3f) << 12) | - ((view.getUint8(++i) & 0x3f) << 6) | - ((view.getUint8(++i) & 0x3f) << 0); - if (chr >= 0x010000) { // surrogate pair - chr -= 0x010000; - string += String.fromCharCode((chr >>> 10) + 0xD800, (chr & 0x3FF) + 0xDC00); - } else { - string += String.fromCharCode(chr); - } - continue; - } - throw new Error('Invalid byte ' + byte.toString(16)); - } - return string; -} - Decoder.prototype.array = function (length) { var value = new Array(length); for (var i = 0; i < length; i++) { @@ -72,7 +33,7 @@ Decoder.prototype.map = function (length) { }; Decoder.prototype.str = function (length) { - var value = utf8Read(this.view, this.offset, length); + var value = utf8.read(this.view, this.offset, length); this.offset += length; return value; }; diff --git a/browser/encode.js b/browser/encode.js index 8b6fdd1..6550383 100644 --- a/browser/encode.js +++ b/browser/encode.js @@ -1,58 +1,12 @@ 'use strict'; -function utf8Write(view, offset, str) { - var c = 0; - for (var i = 0, l = str.length; i < l; i++) { - c = str.charCodeAt(i); - if (c < 0x80) { - view.setUint8(offset++, c); - } - else if (c < 0x800) { - view.setUint8(offset++, 0xc0 | (c >> 6)); - view.setUint8(offset++, 0x80 | (c & 0x3f)); - } - else if (c < 0xd800 || c >= 0xe000) { - view.setUint8(offset++, 0xe0 | (c >> 12)); - view.setUint8(offset++, 0x80 | (c >> 6) & 0x3f); - view.setUint8(offset++, 0x80 | (c & 0x3f)); - } - else { - i++; - c = 0x10000 + (((c & 0x3ff) << 10) | (str.charCodeAt(i) & 0x3ff)); - view.setUint8(offset++, 0xf0 | (c >> 18)); - view.setUint8(offset++, 0x80 | (c >> 12) & 0x3f); - view.setUint8(offset++, 0x80 | (c >> 6) & 0x3f); - view.setUint8(offset++, 0x80 | (c & 0x3f)); - } - } -} - -function utf8Length(str) { - var c = 0, length = 0; - for (var i = 0, l = str.length; i < l; i++) { - c = str.charCodeAt(i); - if (c < 0x80) { - length += 1; - } - else if (c < 0x800) { - length += 2; - } - else if (c < 0xd800 || c >= 0xe000) { - length += 3; - } - else { - i++; - length += 4; - } - } - return length; -} +var utf8 = require('./utf8'); function _encode(bytes, defers, value) { var type = typeof value, i = 0, l = 0, hi = 0, lo = 0, length = 0, size = 0; if (type === 'string') { - length = utf8Length(value); + length = utf8.length(value); // fixstr if (length < 0x20) { @@ -289,7 +243,7 @@ function encode(value) { view.setUint8(offset + j, bin[j]); } } else if (defer.str) { - utf8Write(view, offset, defer.str); + utf8.write(view, offset, defer.str); } else if (defer.float !== undefined) { view.setFloat64(offset, defer.float); } diff --git a/browser/utf8.js b/browser/utf8.js new file mode 100644 index 0000000..bb12da4 --- /dev/null +++ b/browser/utf8.js @@ -0,0 +1,114 @@ +'use strict'; +/* globals TextDecoder, TextEncoder */ + +var read = function read(view, offset, length) { + var string = '', chr = 0; + for (var i = offset, end = offset + length; i < end; i++) { + var byte = view.getUint8(i); + if ((byte & 0x80) === 0x00) { + string += String.fromCharCode(byte); + continue; + } + if ((byte & 0xe0) === 0xc0) { + string += String.fromCharCode( + ((byte & 0x1f) << 6) | + (view.getUint8(++i) & 0x3f) + ); + continue; + } + if ((byte & 0xf0) === 0xe0) { + string += String.fromCharCode( + ((byte & 0x0f) << 12) | + ((view.getUint8(++i) & 0x3f) << 6) | + ((view.getUint8(++i) & 0x3f) << 0) + ); + continue; + } + if ((byte & 0xf8) === 0xf0) { + chr = ((byte & 0x07) << 18) | + ((view.getUint8(++i) & 0x3f) << 12) | + ((view.getUint8(++i) & 0x3f) << 6) | + ((view.getUint8(++i) & 0x3f) << 0); + if (chr >= 0x010000) { // surrogate pair + chr -= 0x010000; + string += String.fromCharCode((chr >>> 10) + 0xD800, (chr & 0x3FF) + 0xDC00); + } else { + string += String.fromCharCode(chr); + } + continue; + } + throw new Error('Invalid byte ' + byte.toString(16)); + } + return string; +}; + +if (typeof TextDecoder !== 'undefined') { + var decoder = new TextDecoder(); + read = function read(view, offset, length) { + var arr = new Uint8Array(view.buffer || view, offset, length); + return decoder.decode(arr); + }; +} + +var write = function write(view, offset, str) { + var c = 0; + for (var i = 0, l = str.length; i < l; i++) { + c = str.charCodeAt(i); + if (c < 0x80) { + view.setUint8(offset++, c); + } + else if (c < 0x800) { + view.setUint8(offset++, 0xc0 | (c >> 6)); + view.setUint8(offset++, 0x80 | (c & 0x3f)); + } + else if (c < 0xd800 || c >= 0xe000) { + view.setUint8(offset++, 0xe0 | (c >> 12)); + view.setUint8(offset++, 0x80 | (c >> 6) & 0x3f); + view.setUint8(offset++, 0x80 | (c & 0x3f)); + } + else { + i++; + c = 0x10000 + (((c & 0x3ff) << 10) | (str.charCodeAt(i) & 0x3ff)); + view.setUint8(offset++, 0xf0 | (c >> 18)); + view.setUint8(offset++, 0x80 | (c >> 12) & 0x3f); + view.setUint8(offset++, 0x80 | (c >> 6) & 0x3f); + view.setUint8(offset++, 0x80 | (c & 0x3f)); + } + } +}; + +if (typeof TextEncoder !== 'undefined') { + var encoder = new TextEncoder(); + write = function write(view, offset, str) { + var arr = new Uint8Array(view.buffer || view, offset); + var encoded = encoder.encode(str); + arr.set(encoded); + }; +} + +function length(str) { + var c = 0, length = 0; + for (var i = 0, l = str.length; i < l; i++) { + c = str.charCodeAt(i); + if (c < 0x80) { + length += 1; + } + else if (c < 0x800) { + length += 2; + } + else if (c < 0xd800 || c >= 0xe000) { + length += 3; + } + else { + i++; + length += 4; + } + } + return length; +} + +module.exports = { + read: read, + write: write, + length: length +};